SINGA-29 Update NeuralNet class to enable customizing layer partition type

1. Clean the code for NeuralNet and Graph classes.
   Graph class only provides functions about Node and Edge management, e.g., 
add, remove and toplogy sort.
   NeuralNet provides one function (CreateGraph) to convert net configuration 
into a Graph. Net partitioning
   is done in CreateGraph function. The CreateNetFromGraph function create and 
connect layers from the graph.
2. Users can customize the partition for whole net and for a specific layer 
through field partition_dim of LayerProto and NetProto.
   the configuration of LayerProto overwrites that of NetProto.

Tested on single process with non-distributed training, shared-memory hogwild, 
one worker group with 2 workers.
Tested with two processes for downpour and distributed hogwild. Downpour has 
similar performance as shared-memory hoglwild; while Distributed hogwild does 
not perform as good non-distributed training.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/9a6e09fa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/9a6e09fa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/9a6e09fa

Branch: refs/heads/master
Commit: 9a6e09fa2e56ea4c2563264c378ee9f3eb314acf
Parents: ea7cfea
Author: wang wei <[email protected]>
Authored: Sat Jul 11 14:31:54 2015 +0800
Committer: wang sheng <[email protected]>
Committed: Sun Jul 12 00:44:29 2015 +0800

----------------------------------------------------------------------
 include/neuralnet/base_layer.h | 493 +++++++++++-----------------
 include/neuralnet/layer.h      | 215 ++++---------
 include/neuralnet/neuralnet.h  | 152 ++++-----
 include/trainer/server.h       |   9 +-
 include/trainer/trainer.h      |  15 +-
 include/trainer/worker.h       |  23 +-
 include/utils/common.h         |  76 ++---
 include/utils/graph.h          | 201 +++++-------
 include/utils/param.h          |   2 +-
 include/utils/updater.h        |  12 +-
 src/neuralnet/base_layer.cc    | 191 +++++------
 src/neuralnet/layer.cc         | 435 ++++++++++++-------------
 src/neuralnet/neuralnet.cc     | 622 ++++++++++++++++--------------------
 src/proto/common.proto         |   7 +
 src/proto/model.proto          |  23 +-
 src/trainer/server.cc          |  12 +-
 src/trainer/trainer.cc         |  63 ++--
 src/trainer/worker.cc          | 110 +++----
 src/utils/common.cc            |  48 +++
 src/utils/graph.cc             | 294 +++++++++--------
 src/utils/param.cc             |   3 +-
 src/utils/updater.cc           |  10 +-
 22 files changed, 1313 insertions(+), 1703 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/neuralnet/base_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/base_layer.h b/include/neuralnet/base_layer.h
index 8b5b1bf..047e43d 100644
--- a/include/neuralnet/base_layer.h
+++ b/include/neuralnet/base_layer.h
@@ -1,14 +1,11 @@
-#ifndef INCLUDE_BASE_LAYER_H_
-#define INCLUDE_BASE_LAYER_H_
+#ifndef SINGA_NEURALNET_BASE_LAYER_H_
+#define SINGA_NEURALNET_BASE_LAYER_H_
 
 #include <vector>
 #include <string>
 #include <map>
-#include <functional>
 #include <utility>
 #include <memory>
-#include <chrono>
-#include <algorithm>
 #include <thread>
 
 #include "proto/model.pb.h"
@@ -17,162 +14,110 @@
 #include "utils/common.h"
 #include "utils/blob.h"
 
+namespace singa {
+
 using std::vector;
-using std::shared_ptr;
-using std::make_shared;
 using std::string;
 using std::map;
 
-namespace singa{
 
 class Layer;
-typedef shared_ptr<Layer> SLayer;
 /**
  * Base layer class.
- * Children should implement at least Layer::Setup, Layer::ComputeFeature(),
- * Layer::ComputGradient() functions for backpropagation method;
- * TODO(zhaojing) subclass the base layer class to support contrastive 
divergence,
- * The identifier of each layer is the literal string of the class name without
- * the suffix "Layer", which is used in layer registration and creation.
+ *
+ * Children should implement at least
+ * Layer::ComputeFeature() and Layer::ComputGradient()
+ * functions for contrastive-divergence/back-propagation algorithm.
  */
 class Layer {
  public:
-  Layer(){}
-  virtual ~Layer(){}
-  /**
-   * Layer initialization.
-   *
-   * It simply saves the proto configuation, most initializations are done by
-   * Setup().
-   *
-   * @param proto user defined layer configuration
-   */
-  virtual void Init(const LayerProto &proto);
-  /**
-   * Copy layer configuration from the other Layer, and use the shape argument
-   * to as its data shape.
-   */
-  void Init(const Layer& other, const vector<int>& shape);
-  /**
-   * TODO(wangsheng) Marshal layer properties and data into google protobuf
-   * object (i.e., snapshot).
-   *
-   * Parameters are marshalled separately into another object (i.e., model).
-   *
-   * @param layer_proto
-   * @param copyData if true marshal layer data, e.g., feature value
-   */
-  virtual void ToProto(LayerProto *layer_proto, bool copyData);
+  Layer() { }
+  virtual ~Layer() {}
   /**
    * Setup layer properties.
    *
    * Setup the shapes for data and parameters, also setup some properties
-   * based on the layer configuration and connected src layers.
+   * based on the layer configuration and connected layers.
    *
-   * @param srclayers layers connecting to this layer
-   */
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers)=0;
-  /**
-   * \copydoc Setup(const LayerProto&, const vector<SLayer>&)
+   * @param proto layer configuration.
+   * @param npartitions num of total partitions of the original layer. This
+   * layer should be setup as one partition.
    */
-  virtual void Setup();
+  virtual void Setup(const LayerProto& proto, int npartitions = 1);
+
   /**
-   * Setup the layer properties except shape.
+   * Compute features of this layer based on connected layers.
    *
-   * The shape is already set and passed in to set other properties.
-   * properties are set according to shapes of itself and connected layers, and
-   * configuration. this should not change the current shape_(
-   * shape check is done outside the function).
+   * @param phase kTrain, kTest, kPositive, etc.
    */
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers)=0;
+  virtual void ComputeFeature(Phase phase, Metric* perf) = 0;
   /**
-   * \copybrief SetupAfterPartition(const LayerProto&, const vector<int> &,
-   * const vector<SLayer>& ).
-   */
-  virtual void SetupAfterPartition();
-  /**
-   * Layers that have paramters must overload this function.
+   * Compute gradients for parameters and connected layers.
    *
-   * @return parameters associated with this layer
+   * @param phase kTrain, kTest, kPositive, etc.
    */
-  virtual vector<shared_ptr<Param>> GetParams(){
-    return vector<shared_ptr<Param>>();
-  }
+  virtual void ComputeGradient(Phase phase) = 0;
+
   /**
-   * Compute features of this layer based on connected layers.
-   *
-   * Implement forward propagation for BP.
-   * TODO(zhaojing) Implement both postive phase and negative phase for CD.
+   * For print debug info about each layer, e.g., norm of feature vector,
+   * norm of parameters.
    *
-   * @param training true if in training phase
-   * @param srclayers layers connecting to this layer
+   * @param step training/test/validation step
+   * @param phase forward/backward/positive/negative...
+   * @return debug info about this layer.
    */
-  virtual void ComputeFeature(Phase phase, const vector<SLayer>& srclayers)=0;
+  const string DebugString(int step, Phase phase);
   /**
-   * \copybrief ComputeFeature(const vector<SLayer>& srclayers)
-   */
-  virtual void ComputeFeature(Phase phase);
-  /**
-   * Compute gradients for parameters and connecting layers.
-   *
-   * Implement backward propagation for BP.
-   * TODO(zhaojing) Calculate gradients for parameters for CD.
+   * Layers that have paramters must override this function.
    *
-   * @param srclayers layers connecting to this layer.
-   */
-  virtual void ComputeGradient(const vector<SLayer>& srclayers)=0;
-  /**
-   * \copybrief ComputeGradient(const vector<SLayer>& srclayers)
+   * @return parameters associated with this layer
    */
-  virtual void ComputeGradient();
+  virtual const vector<Param*> GetParams() const {
+    return vector<Param*> {};
+  }
   /**
-   * Decide on which dimension to do the partitioning.
+   * Return the connection type between one neuron of this layer and
+   * its source layer.
+   * Currently support two connection types: kOneToOne, and kOneToAll.
+   * kOneToOne indicates the neuron depends on only one neuron from src layer.
+   * kOneToAll indicates the neuron depends on all neurons from src layer.
+   * TODO support kOneToMany.
    *
-   * @mode kLayer, kData, kNone (no partition)
-   * @return the partition dimension, -1 for no partition
+   * @param k index of source layer (current only support k = 0.
+   * @param connection type.
    */
-  virtual int partition_dimension() const {
-    int ret=0;
-    if(partition_type()==kLayerPartition)
-      ret= 1;
-    else if(partition_type()==kNone)
-      ret= -1;
-    return ret;
+  virtual ConnectionType src_neuron_connection(int k) const {
+    // CHECK_LT(k, srclayers_.size());
+    return kOneToOne;
   }
 
   /**
-   * Return connection type between two layers.
+   * Return the connection type of this layer and all dst layers.
    *
-   * Currently support two connections: kOneToOne, and kOneToAll.
-   * kOneToOne indicates the dst neuron depends on only one neuron from src
-   * layer. kOneToAll indicates the dst neuron depends on all neurons from src
-   * layer. TODO support kOneToMany.
+   * Currently support two connection types: kOneToOne, and kOneToMany.
+   * kOneToOne indicates the users implement the ComputeFeature and
+   * ComputeGradient function considering only one dest layer. In this case,
+   * a SplitLayer will be added automatically to connect this layer with all
+   * dest layer.
+   * kOneToMany indicates the users has already considered multiple dest layers
+   * in the implementation.
+   * @return connection type default is kOneToOne.
    */
-  virtual ConnectionType connection_type(int k) const {
-    CHECK_LT(k, srclayers_.size());
+  virtual ConnectionType dst_layer_connection() const {
     return kOneToOne;
   }
   /**
-   * @return partition type of this layer, e.g., kNone, kLayer or kData.
+   * @return partition dimension of this layer.
+   * -1 for no partition;
+   *  0 for partition the mini-batch into sub-mini-batch.
+   *  1 for partition the layer feature vector into sub-vector.
    */
-  virtual PartitionType partition_type() const {
-    return layer_proto_.partition_type();
+  virtual int partition_dim() const {
+    return layer_proto_.partition_dim();
   }
-  /**
-   * partition id is the ID of the layer in the original layer.
-   */
-  virtual void set_partitionid(int id){
-    layer_proto_.set_partitionid(id);
-  }
-  virtual int partitionid() const {
-    return layer_proto_.partitionid();
-  }
-  virtual void set_name(string name){
-    name_=name;
-    layer_proto_.set_name(name);
+
+  virtual int partition_id() const {
+    return layer_proto_.partition_id();
   }
   virtual int type() const {
     return layer_proto_.type();
@@ -187,22 +132,18 @@ class Layer {
    * @return name of src data blob, used by prefetch layer to locate the data
    * blob in parser layers; The default value is "unknown"; If the
    * src layer is the prefetch layer and there are more than one parser layers,
-   * this value value be set.
+   * this value be set.
    */
   const std::string &datablob() const {
     return layer_proto_.datablob();
   }
-  const vector<int>& shape(const Layer* layer) const{
-    return data(layer).shape();
-  }
-
   /**
    * @return a const ref for Blob storing neuron values of this layer for BP
    */
   virtual const Blob<float>& data(const Layer* from) const {
     return data_;
   }
-  virtual Blob<float>* mutable_data(const Layer* from){
+  virtual Blob<float>* mutable_data(const Layer* from) {
     return &data_;
   }
 
@@ -215,37 +156,36 @@ class Layer {
   virtual Blob<float>* mutable_grad(const Layer* from) {
     return &grad_;
   }
-
   /**
    * return LayerS that connected to this layer
    */
-  virtual const vector< SLayer> srclayers() const {
+  virtual const vector<Layer*> srclayers() const {
     return srclayers_;
   }
   /**
    * return LayerS that this layer connected to
    */
-  virtual const vector<SLayer> dstlayers() const {
+  virtual const vector<Layer*> dstlayers() const {
     return dstlayers_;
   }
 
-  virtual const int srclayers_size() const {
+  virtual int srclayers_size() const {
     return srclayers_.size();
   }
-  virtual const int dstlayers_size() const {
+  virtual int dstlayers_size() const {
     return dstlayers_.size();
   }
-  virtual void ClearDstLayers() {
+  virtual void clear_dstlayers() {
     dstlayers_.clear();
   }
-  virtual void ClearSrcLayers() {
+  virtual void clear_srclayers() {
     srclayers_.clear();
   }
 
-  virtual void AddSrcLayer(SLayer src){
+  virtual void add_srclayer(Layer* src) {
     srclayers_.push_back(src);
   }
-  virtual void AddDstLayer(SLayer dst){
+  virtual void add_dstlayer(Layer* dst) {
     dstlayers_.push_back(dst);
   }
 
@@ -264,11 +204,11 @@ class Layer {
   virtual bool is_bridgedstlayer() const {
     return false;
   }
-protected:
-  string name_;
-  Blob<float> data_, grad_;
+
+ protected:
   LayerProto layer_proto_;
-  vector<SLayer> srclayers_, dstlayers_;
+  Blob<float> data_, grad_;
+  vector<Layer*> srclayers_, dstlayers_;
 };
 
 /**
@@ -277,42 +217,44 @@ protected:
  */
 class BridgeSrcLayer: public Layer {
  public:
-  using Layer::Setup;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
+  using Layer::data;
+  using Layer::mutable_data;
+  using Layer::grad;
+  using Layer::mutable_grad;
+  using Layer::is_bridgesrclayer;
 
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual void SetupAfterPartition();
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers){}
+  void ComputeFeature(Phase phase, Metric* perf) override {}
+  void ComputeGradient(Phase phase) override {
+    ready_ = false;
+  }
 
-  virtual void ComputeFeature(Phase phase, const vector<SLayer>& srclayers);
-  virtual void ComputeGradient(const vector<SLayer>& srclayers);
-  virtual const Blob<float>& data(const Layer* from) const {
+  const Blob<float>& data(const Layer* from) const override {
     return srclayers_[0]->data(this);
   }
-  virtual Blob<float>* mutable_data(const Layer* from){
+  Blob<float>* mutable_data(const Layer* from) override {
     return srclayers_[0]->mutable_data(this);
   }
-
-  virtual const Blob<float>& grad(const Layer* from) const {
+  const Blob<float>& grad(const Layer* from) const override {
     return srclayers_[0]->grad(this);
   }
-  virtual Blob<float>* mutable_grad(const Layer* from) {
+  Blob<float>* mutable_grad(const Layer* from) override {
     return srclayers_[0]->mutable_grad(this);
   }
-  int dst_partition() const;
-  virtual bool is_bridgesrclayer() const {
+
+  bool is_bridgesrclayer() const override {
     return true;
   }
-  virtual void set_ready(bool a) {
-    ready_=a;
+  void set_ready(bool a) {
+    ready_ = a;
   }
-  virtual bool ready() const {
+  bool ready() const {
     return ready_;
   }
+
  protected:
+  //!< true if received grad from BridgeDstLayer
   bool ready_;
 };
 /**
@@ -321,30 +263,26 @@ class BridgeSrcLayer: public Layer {
  */
 class BridgeDstLayer: public Layer {
  public:
-  using Layer::Setup;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual void SetupAfterPartition();
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers){}
-
-  virtual void ComputeFeature(Phase phase, const vector<SLayer>& srclayers){
-    ready_=false;
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric* perf) override {
+    // reset ready_ for next iteration.
+    ready_ = false;
   }
-  virtual void ComputeGradient(const vector<SLayer>& srclayers){}
-  virtual bool is_bridgedstlayer() const {
+  void ComputeGradient(Phase phase) override {}
+  bool is_bridgedstlayer() const {
     return true;
   }
-  virtual void set_ready(bool a) {
-    ready_=a;
+  void set_ready(bool ready) {
+    ready_ = ready;
   }
-  virtual bool ready() const {
+  bool ready() const {
     return ready_;
   }
  protected:
+  //!< true if received data from BridgeSrcLayer
   bool ready_;
 };
 
@@ -353,71 +291,52 @@ class BridgeDstLayer: public Layer {
  */
 class ConcateLayer: public Layer {
  public:
-  using Layer::Setup;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual void SetupAfterPartition();
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers){}
-
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric* perf) override;
+  void ComputeGradient(Phase phase) override;
 };
 
-
 /**
  * Base layer for reading records from local Shard, HDFS, lmdb, etc.
- * Cannot be partitioned, always returns kNone for partition type.
  */
-
 class DataLayer: public Layer{
  public:
-  using Layer::Setup;
-  using Layer::ComputeFeature;
   using Layer::ComputeGradient;
+  using Layer::mutable_data;
+  using Layer::mutable_grad;
+  using Layer::dst_layer_connection;
 
-  virtual void ComputeFeature(Phase phase, const vector<SLayer>& srclayers)=0;
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& 
srclayers)=0;
-  virtual bool is_datalayer() const {
+  void ComputeGradient(Phase phase) override {}
+  bool is_datalayer() const override {
     return true;
   }
-  virtual void ComputeGradient(const vector<SLayer>& srclayers){};
-  virtual const vector<Record>& records() const {
-    return records_;
-  }
-  virtual void Setup(){
-    vector<SLayer> dummy;
-    Setup(layer_proto_,dummy);
-    has_setup_=true;
+  Blob<float>* mutable_data(const Layer* layer) override {
+    return nullptr;
   }
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers){}
-
-  virtual void SetupAfterPartition(){
-    if(!has_setup_)
-    Setup();
+  Blob<float>* mutable_grad(const Layer* layer) override {
+    return nullptr;
   }
-  virtual PartitionType partition_type () const {
-    return kNone;
+  ConnectionType dst_layer_connection() const override {
+    return kOneToMany;
   }
 
-  virtual int batchsize() const=0;
+  int batchsize() const {
+    return batchsize_;
+  }
   virtual const Record& sample() const {
     return sample_;
   }
-
-  virtual Blob<float>* mutable_data(const Layer* layer) {
-    return nullptr;
-  }
-  virtual Blob<float>* mutable_grad(const Layer* layer) {
-    return nullptr;
+  /**
+   * @return the loaded records
+   */
+  virtual const vector<Record>& records() const {
+    return records_;
   }
+
  protected:
-  bool has_setup_;
   int random_skip_, batchsize_;
   Record sample_;
   vector<Record> records_;
@@ -432,36 +351,29 @@ class DataLayer: public Layer{
  */
 class PrefetchLayer : public Layer {
  public:
-  using Layer::Setup;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
-  using Layer::SetupAfterPartition;
 
-  virtual ~PrefetchLayer();
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual void ComputeFeature(Phase phase, const vector<SLayer>& srclayers);
-  virtual void ComputeGradient(const vector<SLayer>& srclayers){};
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers){}
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric* perf) override;
+  void ComputeGradient(Phase phase) override {};
 
-  virtual const Blob<float>& data(const Layer* from) const ;
-  virtual Blob<float>* mutable_data(const Layer* layer) ;
+  const Blob<float>& data(const Layer* from) const override;
+  Blob<float>* mutable_data(const Layer* layer) override;
 
-  virtual Blob<float>* mutable_grad(const Layer* layer){
+  Blob<float>* mutable_grad(const Layer* layer) override {
     return nullptr;
   }
-  virtual const Blob<float>& grad(const Layer* from) const {
-    CHECK(false)<<"Loss layer has not gradient blob";
+  const Blob<float>& grad(const Layer* from) const override {
+    CHECK(false) << "Loss layer has not gradient blob";
     return grad_;
   }
-  virtual PartitionType partition_type () const {
-    return kNone;
-  }
 
   void Prefetch(Phase phase);
+  virtual ~PrefetchLayer();
+
  protected:
-  vector<shared_ptr<Layer>> sublayers_;
+  vector<Layer*> sublayers_;
   map<string, Blob<float>> datablobs_;
   std::thread thread_;
 };
@@ -471,46 +383,46 @@ class PrefetchLayer : public Layer {
  */
 class SliceLayer: public Layer {
  public:
-  using Layer::Setup;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual void SetupAfterPartition();
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers){}
-
-  virtual const Blob<float>& data(const Layer* layer) const;
-  virtual const Blob<float>& grad(const Layer* layer) const;
-  virtual Blob<float>* mutable_data(const Layer* layer);
-  virtual Blob<float>* mutable_grad(const Layer* layer);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric* perf) override;
+  void ComputeGradient(Phase phase) override;
+  ConnectionType dst_layer_connection() const override {
+    return kOneToMany;
+  }
+  const Blob<float>& data(const Layer* layer) const override;
+  const Blob<float>& grad(const Layer* layer) const override;
+  Blob<float>* mutable_data(const Layer* layer) override;
+  Blob<float>* mutable_grad(const Layer* layer) override;
+
  protected:
   int SliceID(const Layer* layer) const;
+
+ private:
   vector<Blob<float>> datavec_, gradvec_;
   int slice_dim_, slice_num_;
 };
 
 /**
- * Replciate this layer into multiple dst layers
- * TODO change name to ReplicateLayer.
+ * Connect the source layer with multiple dst layers.
+ * Pass source layer's data blob directly to dst layers.
+ * Aggregate dst layer's gradients into source layer's gradient.
  */
 class SplitLayer: public Layer {
  public:
-  using Layer::Setup;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual void SetupAfterPartition();
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers){}
-
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric* perf) override;
+  void ComputeGradient(Phase phase) override;
+  ConnectionType dst_layer_connection() const override {
+    return kOneToMany;
+  }
+ protected:
+  Blob<float> grads_;
 };
 
 /**
@@ -518,28 +430,21 @@ class SplitLayer: public Layer {
  */
 class LossLayer: public Layer{
  public:
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
-
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers)=0;
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers)=0;
+  using Layer::mutable_grad;
+  using Layer::grad;
+  using Layer::is_losslayer;
 
-  virtual Blob<float>* mutable_grad(const Layer* layer){
+  Blob<float>* mutable_grad(const Layer* layer) override {
     return nullptr;
   }
-  virtual const Blob<float>& grad(const Layer* from) const {
-    CHECK(false)<<"Loss layer has not gradient blob";
+  const Blob<float>& grad(const Layer* from) const override {
+    CHECK(false) << "Loss layer has not gradient blob";
     return grad_;
   }
-  virtual bool is_losslayer() const {
+  bool is_losslayer() const override {
     return true;
   }
-  virtual const Blob<float>& metric() const {
-    return metric_;
-  }
+
  protected:
   Blob<float> metric_;
 };
@@ -549,56 +454,30 @@ class LossLayer: public Layer{
  */
 class ParserLayer: public Layer {
  public:
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
+  using Layer::is_parserlayer;
+  using Layer::mutable_grad;
+  using Layer::grad;
 
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers)=0;
+  void ComputeFeature(Phase phase, Metric* perf) override;
+  void ComputeGradient(Phase phase) override {};
   /**
    * Parse records from DataLayer into blob.
-   * This function is called by
-   * ComputeFeature(Phase, const vector<SLayer>& srclayers)  or 
Prefetch(Phase).
    */
   virtual void ParseRecords(Phase phase, const vector<Record>& records,
-      Blob<float>* blob)=0;
-
-  virtual bool is_parserlayer() const {
+      Blob<float>* blob) = 0;
+  bool is_parserlayer() const override {
     return true;
   }
-
-  virtual void ComputeFeature(Phase phase, const vector<SLayer>& srclayers);
-  /**
-   * Dummy function. ParserLayer does not compute gradients.
-   */
-  virtual void ComputeGradient(const vector<SLayer>& srclayers){};
-  virtual void Setup(){
-    Setup(layer_proto_,srclayers_);
-    has_setup_=true;
-  }
-  virtual void SetupAfterPartition(){
-    if(!has_setup_)
-      Setup();
-  }
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers){}
-
-  virtual PartitionType partition_type () const{
-    return kNone;
-  }
-  virtual Blob<float>* mutable_grad(const Layer* layer) {
+  Blob<float>* mutable_grad(const Layer* layer) override {
     return nullptr;
   }
-  virtual const Blob<float>& grad(const Layer* from) const {
-    CHECK(false)<<"Parser layer has not gradient blob";
+  const Blob<float>& grad(const Layer* from) const  override {
+    CHECK(false) << "Parser layer has not gradient blob";
     return grad_;
   }
-
- private:
-  bool has_setup_;
 };
-} // singa
+}  // namespace singa
 
-#endif // INCLUDE_BASE_LAYER_H_
+#endif  // SINGA_NEURALNET_BASE_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h
index 48cffa2..b678e63 100644
--- a/include/neuralnet/layer.h
+++ b/include/neuralnet/layer.h
@@ -1,5 +1,7 @@
-#ifndef INCLUDE_NET_LAYER_H_
-#define INCLUDE_NET_LAYER_H_
+#ifndef SINGA_NEURALNET_LAYER_H_
+#define SINGA_NEURALNET_LAYER_H_
+
+#include <lmdb.h>
 
 #include <vector>
 #include <string>
@@ -9,13 +11,11 @@
 #include <memory>
 #include <chrono>
 #include <random>
-#include <lmdb.h>
 
 #include "proto/model.pb.h"
 #include "utils/data_shard.h"
 #include "neuralnet/base_layer.h"
 
-
 /**
  * \file this file includes the declarations neuron layer classes that conduct
  * the transformation of features.
@@ -27,54 +27,39 @@ namespace singa {
  */
 class ConvolutionLayer: public Layer {
  public:
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers);
-  /**
-   * need to reset some properties (e.g., weight matrix) according to
-   * shapes (after partition, e.g., partition is done against channel 
dimension)
-   */
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers);
-
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
-  virtual vector<shared_ptr<Param>> GetParams() {
-    return vector<shared_ptr<Param>>{weight_, bias_};
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
+  void ComputeGradient(Phase phase) override;
+  const vector<Param*> GetParams() const override {
+    vector<Param*> params{weight_, bias_};
+    return params;
   }
-  virtual ConnectionType connection_type(int k) const {
-    CHECK_LT(k, srclayers_.size());
+  ConnectionType src_neuron_connection(int k) const  override {
+    // CHECK_LT(k, srclayers_.size());
     return kOneToAll;
   }
+  ~ConvolutionLayer();
+
  protected:
-  int kernel_, pad_,  stride_ ;
-  int batchsize_,  channels_, height_,width_;
+  int kernel_, pad_,  stride_;
+  int batchsize_,  channels_, height_, width_;
   int col_height_, col_width_, conv_height_, conv_width_, num_filters_;
-  shared_ptr<Param> weight_, bias_;
+  Param* weight_, *bias_;
   Blob<float> col_data_, col_grad_;
 };
 
 class DropoutLayer: public Layer {
  public:
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers);
-
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
+  void ComputeGradient(Phase phase) override;
 
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
  protected:
   // drop probability
   float pdrop_;
@@ -89,31 +74,23 @@ class DropoutLayer: public Layer {
   */
 class InnerProductLayer: public Layer {
  public:
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
+  void ComputeGradient(Phase phase) override;
 
-  /**
-   * need to reset weight matrix in case of LayerPartition
-   */
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers);
-  virtual ConnectionType connection_type(int k) const {
-    CHECK_LT(k, srclayers_.size());
+
+  ConnectionType src_neuron_connection(int k) const override {
+    // CHECK_LT(k, srclayers_.size());
     return kOneToAll;
   }
-
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
-  //virtual void ToProto(LayerProto *layer_proto, bool copyData);
-  virtual vector<shared_ptr<Param>> GetParams() {
-    return vector<shared_ptr<Param>>{weight_, bias_};
+  const vector<Param*> GetParams() const override {
+    vector<Param*> params{weight_, bias_};
+    return params;
   }
+  ~InnerProductLayer();
 
  private:
   //! dimension of the hidden layer
@@ -121,16 +98,16 @@ class InnerProductLayer: public Layer {
   //! dimension of the visible layer
   int vdim_;
   int batchsize_;
-  shared_ptr<Param> weight_, bias_;
+  Param* weight_, *bias_;
 };
 
 class LabelLayer: public ParserLayer {
  public:
-  using ParserLayer::Setup;
+  using ParserLayer::ParseRecords;
 
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual void ParseRecords(Phase phase, const vector<Record>& records,
-      Blob<float>* blob);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ParseRecords(Phase phase, const vector<Record>& records,
+      Blob<float>* blob) override;
 };
 
 class LRNLayer: public Layer {
@@ -142,22 +119,13 @@ class LRNLayer: public Layer {
  * a_i, the activation (after ReLU) of a neuron convolved with the i-th kernel.
  * b_i, the neuron after normalization, N is the total num of kernels
  */
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
- public:
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers);
-
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers);
-
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
+  void ComputeGradient(Phase phase) override;
 
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
  protected:
   //! shape of the bottom layer feature
   int batchsize_, channels_, height_, width_;
@@ -170,11 +138,11 @@ class LRNLayer: public Layer {
 
 class MnistLayer: public ParserLayer {
  public:
-  using Layer::Setup;
+  using ParserLayer::ParseRecords;
 
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual void ParseRecords(Phase phase, const vector<Record>& records,
-      Blob<float>* blob);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ParseRecords(Phase phase, const vector<Record>& records,
+      Blob<float>* blob) override;
 
  protected:
   // height and width of the image after deformation
@@ -182,47 +150,34 @@ class MnistLayer: public ParserLayer {
   // n^2 images are processed as a batch for elastic distortion
   // conv height and conv width
   // gauss kernel values, displacements, column image and tmp buffer
-  //float* gauss_, *displacementx_, *displacementy_, *colimg_, *tmpimg_;
+  // float* gauss_, *displacementx_, *displacementy_, *colimg_, *tmpimg_;
   float  gamma_, beta_, sigma_, kernel_, alpha_, norm_a_, norm_b_;
   int resize_, elastic_freq_;
 };
 
 class PoolingLayer: public Layer {
  public:
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers);
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers);
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
+  void ComputeGradient(Phase phase) override;
+
  protected:
   int kernel_, pad_, stride_;
-  int batchsize_,channels_, height_, width_, pooled_height_, pooled_width_;
+  int batchsize_, channels_, height_, width_, pooled_height_, pooled_width_;
   PoolingProto_PoolMethod pool_;
 };
 
 class ReLULayer: public Layer {
  public:
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers);
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers);
-
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
+  void Setup(const LayerProto& proto, int npartitions = 1) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
+  void ComputeGradient(Phase phase) override;
 };
 
 
@@ -231,34 +186,26 @@ class SoftmaxLossLayer: public LossLayer {
    * connected from the label layer and the last fc layer
    */
  public:
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
+  void ComputeGradient(Phase phase) override;
 
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers);
   /**
    * softmax is not recommendeded for partition because it requires the whole
    * src layer for normalization.
    */
-  virtual PartitionType partition_type() const {
-    if(layer_proto_.partition_type()==kLayerPartition)
-      return kNone;
-    else
-      return layer_proto_.partition_type();
+  int partition_dim() const override {
+    CHECK_LE(layer_proto_.partition_dim(), 1);
+    return layer_proto_.partition_dim();
   }
-  virtual ConnectionType connection_type(int k) const {
-    CHECK_LT(k, srclayers_.size());
+  ConnectionType src_neuron_connection(int k) const override {
+    // CHECK_LT(k, srclayers_.size());
     return kOneToAll;
   }
 
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
  private:
   int batchsize_;
   int dim_;
@@ -268,11 +215,11 @@ class SoftmaxLossLayer: public LossLayer {
 
 class RGBImageLayer: public ParserLayer {
  public:
-  using Layer::Setup;
+  using ParserLayer::ParseRecords;
 
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual void ParseRecords(Phase phase, const vector<Record>& records,
-      Blob<float>* blob);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ParseRecords(Phase phase, const vector<Record>& records,
+      Blob<float>* blob) override;
 
  private:
   float scale_;
@@ -283,33 +230,21 @@ class RGBImageLayer: public ParserLayer {
 
 class ShardDataLayer: public DataLayer{
  public:
-  using Layer::Setup;
   using Layer::ComputeFeature;
-  using Layer::ComputeGradient;
 
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers){};
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
-  virtual int batchsize() const {
-    return layer_proto_.sharddata_conf().batchsize();
-  }
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
  private:
   shared_ptr<DataShard> shard_;
 };
 class LMDBDataLayer: public DataLayer{
  public:
-  using Layer::Setup;
   using Layer::ComputeFeature;
-  using Layer::ComputeGradient;
 
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers){};
-  virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers);
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
   void ConvertDatumToSingleLableImageRecord(const Datum& datum,
     SingleLabelImageRecord* record);
-  virtual int batchsize() const {
-    return layer_proto_.lmdbdata_conf().batchsize();
-  }
  private:
   MDB_env* mdb_env_;
   MDB_dbi mdb_dbi_;
@@ -325,21 +260,13 @@ class LMDBDataLayer: public DataLayer{
  */
 class TanhLayer: public Layer {
  public:
-  using Layer::Setup;
-  using Layer::SetupAfterPartition;
   using Layer::ComputeFeature;
   using Layer::ComputeGradient;
 
-  virtual void Setup(const LayerProto& proto,
-      const vector<SLayer>& srclayers);
-
-  virtual void SetupAfterPartition(const LayerProto& proto,
-      const vector<int> &shape,
-      const vector<SLayer>& srclayers);
-
+  void Setup(const LayerProto& proto, int npartitions) override;
+  void ComputeFeature(Phase phase, Metric *perf) override;
+  void ComputeGradient(Phase phase) override;
 
-  virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers);
-  virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers);
  private:
   float outer_scale_, inner_scale_;
 };
@@ -347,4 +274,4 @@ class TanhLayer: public Layer {
 
 }  // namespace singa
 
-#endif  // INCLUDE_NET_LAYER_H_
+#endif  // SINGA_NEURALNET_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/neuralnet/neuralnet.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/neuralnet.h b/include/neuralnet/neuralnet.h
index ec6797c..2e19d0c 100644
--- a/include/neuralnet/neuralnet.h
+++ b/include/neuralnet/neuralnet.h
@@ -1,159 +1,119 @@
-#ifndef INCLUDE_NET_NET_H_
-#define INCLUDE_NET_NET_H_
+#ifndef SINGA_NEURALNET_NEURALNET_H_
+#define SINGA_NEURALNET_NEURALNET_H_
 
-#include <glog/logging.h>
 #include <vector>
 #include <map>
 #include <memory>
+#include <string>
 
 #include "proto/model.pb.h"
 #include "neuralnet/layer.h"
 #include "utils/factory.h"
 #include "utils/graph.h"
 
+namespace singa {
 using std::vector;
 using std::string;
 using std::map;
 using std::shared_ptr;
-namespace singa {
+
 /**
- * The neural network is constructed from user configured layers through google
- * protocol buffer. TODO support constructing neural network by adding layers
- * explicitly. E.g., users create layers and connect them manually in the code.
+ * The neural network is constructed from user configurations in NetProto.
  *
- * Some layers, e.g., SplitLayer and BridgeSrcLayer/BridgeDstLayer will be 
added
- * implicitly to partition the neural network.
+ * Some layers, e.g., SplitLayer and BridgeSrcLayer/BridgeDstLayer
+ * will be added implicitly to partition the neural network.
+ * TODO create wrappers for popular models, e.g., MLP, CNN.
  */
 class NeuralNet {
  public:
   /**
-   * Register Layers
+   * Register Layers, i.e., map layer type to layer class
    */
   static void RegisterLayers();
   /**
-   * Setup the neural network for training, test or validation.
+   * Create the neural network for training, test or validation.
    *
    * Parameters for test/validation net can share those from training after
    * setup (done outside of this funcion).
    *
-   * @param np proto for the neural network.
+   * @param np proto for the neural network
    * @param phase test/training/validation
-   * @param group_size partition the net among this num of workers
+   * @param num num of partitions, do partitioning if num > 1
+   * @return shared pointer to a neural net
    */
-  static shared_ptr<NeuralNet> SetupNeuralNet(const NetProto& np, Phase phase,
-      int group_size);
+  static shared_ptr<NeuralNet> Create(const NetProto& np, Phase phase, int 
num);
 
  public:
   /**
    * construct the net structure from protocol buffer.
+   * @param netproto neural net config
+   * @param npartitions num of partitions. 1 for no partitioning.
    */
-  NeuralNet(NetProto net_proto, int group_size=1);
+  explicit NeuralNet(NetProto netproto, int npartitions = 1);
+  ~NeuralNet();
   /**
-   * construct a json string representing the neuralnet graph.
-   * The json string can be used by other graph engine to draw a figure for
-   * displaying the neuralnet structure.
-   */
-  std::string ToString();
-  /**
-   * Print Norm1 of data and grad of each Layer and parameter.
-   * @param net, neural network
-   */
-  string DebugInfo();
-
-  /**
-   * to display the adjacency layers
+   * To display the adjacency layers
    */
   std::string ToAdjacency();
   /**
-   * Add layer explicitly used in manually programming/constructing neural net.
-   */
-  void AddLayer(const LayerProto &layer_proto){};
-  /**
-   * Add layer explicitly used in manually programming/constructing neural net.
-   */
-  void AddLayer(const Layer* layer){};
-  /**
-   * share weights from other neuralnet
+   * Share memory of parameter values from other neuralnet
    */
-  void ShareParams(shared_ptr<NeuralNet> other,int flag);
-  void ToProto(NetProto *net_proto, bool copyData=false);
-  const std::vector<shared_ptr<Layer>>& layers() {
+  void ShareParams(shared_ptr<NeuralNet> other);
+
+  const std::vector<Layer*>& layers() {
     return layers_;
   }
-  /**
-   * return ParserLayer of the neuralnet.
-   */
-  const std::vector<ParserLayer*>& parserlayers() {
-    if(parserlayers_.size()==0){
-      for(auto& layer: layers_)
-        if(layer->is_parserlayer())
-          parserlayers_.push_back(static_cast<ParserLayer*>(layer.get()));
-    }
+  const std::vector<ParserLayer*>& parserlayers() const {
     return parserlayers_;
   }
-  const std::vector<LossLayer*>& losslayers() {
-    if(losslayers_.size()==0){
-      for(auto& layer: layers_)
-        if(layer->is_losslayer())
-          losslayers_.push_back(static_cast<LossLayer*>(layer.get()));
-    }
+  const std::vector<LossLayer*>& losslayers() const {
     return losslayers_;
   }
-  const std::vector<DataLayer*>& datalayers() {
-    if(datalayers_.size()==0){
-      for(auto& layer: layers_)
-        if(layer->is_datalayer())
-          datalayers_.push_back(static_cast<DataLayer*>(layer.get()));
-    }
+  const std::vector<DataLayer*>& datalayers() const {
     return datalayers_;
   }
-  const std::vector<shared_ptr<Param>> &params()const {
+  const std::vector<Param*>& params() const {
     return params_;
   }
-  shared_ptr<Layer> name2layer(string name){
-    if (name2layer_.find(name)!=name2layer_.end())
-      return name2layer_[name];
-    else return nullptr;
+  Layer* name2layer(string name) const {
+    if (name2layer_.find(name) != name2layer_.end())
+      return name2layer_.at(name);
+    else
+      return nullptr;
   }
-
-  shared_ptr<Param> paramid2param(int id) {
-    if(paramid2param_.size()==0){
-      for(auto& layer: layers_){
-        for(shared_ptr<Param> p: layer->GetParams()){
-          paramid2param_[p->id()]=p;
-        }
-      }
-    }
-    return paramid2param_[id];
+  Param* paramid2param(int id) const {
+    return paramid2param_.at(id);
   }
 
  protected:
-  void ConstructNeuralNet(const NetProto &net_proto);
-  void PartitionNeuralNet();
-  map<string, shared_ptr<Layer>> GetNameToLayer(
-    const vector<shared_ptr<Layer>>& layers);
-  Graph CreatePartitonedGraph(const vector<shared_ptr<Layer>>& layers,
-    const map<string, shared_ptr<Layer>>& name2layer);
-
   /**
-   * Partition each layer according its partition type and dimension.
-   * @param layers original unpartitioned layers
+   * Create a neural net graph, one node for each layer.
+   *
+   * Partition the graph if npartitions > 1, each layer is sliced according to
+   * its own partition setting.
+   * @param netproto
+   * @npartitions
+   * @return neural net graph
+   */
+  Graph* CreateGraph(const NetProto& netproto, int npartitions);
+  /**
+   * Create neural net from graph, one layer per node.
+   */
+  void CreateNetFromGraph(Graph* graph, int npartitions);
+  /**
+   * prepare data structures, e.g., params_, layers_, etc.
    */
-  map<string, vector<shared_ptr<Layer>>> PartitionLayers(
-      const vector<shared_ptr<Layer>>& layers);
+  void PrepareDataStructures();
 
  protected:
-  vector<shared_ptr<Layer>> layers_;
+  vector<Layer*> layers_;
   vector<ParserLayer*> parserlayers_;
   vector<LossLayer*> losslayers_;
   vector<DataLayer*> datalayers_;
-  vector<shared_ptr<Param>> params_;
-  map<string, shared_ptr<Layer>> name2layer_;
-  map<int, shared_ptr<Param>> paramid2param_;
+  vector<Param*> params_;
 
-  map<string, LayerProto> name2layerproto_;
-  int group_size_;
-  Graph graph_;
+  map<string, Layer*> name2layer_;
+  map<int, Param*> paramid2param_;
 };
 }  // namespace singa
-#endif  // INCLUDE_NET_NET_H_
+#endif  // SINGA_NEURALNET_NEURALNET_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/trainer/server.h
----------------------------------------------------------------------
diff --git a/include/trainer/server.h b/include/trainer/server.h
index a8995fb..96a1437 100644
--- a/include/trainer/server.h
+++ b/include/trainer/server.h
@@ -9,7 +9,7 @@
 
 using std::shared_ptr;
 namespace singa {
-typedef std::unordered_map<int, shared_ptr<Param>> ServerShard;
+typedef std::unordered_map<int, Param*> ServerShard;
 /* Repsond to worker's get/put/udpate request, and periodically syncing with
   * other servers.
   *
@@ -24,6 +24,7 @@ class Server{
  public:
 
   Server(int thread_id, int group_id, int server_id);
+  virtual ~Server() {};
   void Setup(const UpdaterProto& proto, shared_ptr<ServerShard> shard,
       const vector<int>& slice2group);
   void Run();
@@ -41,14 +42,14 @@ class Server{
    *
    * @return the orignal message or response message
    */
-       virtual Msg* HandleGet(shared_ptr<Param> param, Msg** msg);
+       virtual Msg* HandleGet(Param* param, Msg** msg);
 
        /**
         * Process Update request.
    *
    * @return the orignal message or response message
    */
-       virtual Msg* HandleUpdate(shared_ptr<Param> param, Msg** msg);
+       virtual Msg* HandleUpdate(Param* param, Msg** msg);
 
        /**
         * Process PUT request.
@@ -61,7 +62,7 @@ class Server{
        /**
    * TODO Process SYNC request.
         */
-       virtual Msg* HandleSyncRequest(shared_ptr<Param> param, Msg** msg);
+       virtual Msg* HandleSyncRequest(Param* param, Msg** msg);
 
  protected:
   int thread_id_,group_id_, server_id_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/trainer/trainer.h
----------------------------------------------------------------------
diff --git a/include/trainer/trainer.h b/include/trainer/trainer.h
index c19a0ae..2419dc4 100644
--- a/include/trainer/trainer.h
+++ b/include/trainer/trainer.h
@@ -43,7 +43,7 @@ typedef struct HandleContext_{
   */
 class ParamInfo{
    public:
-  ParamInfo(shared_ptr<Param> p,int local, int owner):
+  ParamInfo(Param* p,int local, int owner):
     num_update(0), next_version(-1),num_local(local), num_total(1),
     owner_procs(owner){
       shares.push_back(p);
@@ -57,7 +57,7 @@ class ParamInfo{
     *  otherwise
     * @param owner the procs id of the worker who ownes this Param object
     */
-  void AddParam(shared_ptr<Param> p, bool local){
+  void AddParam(Param* p, bool local){
     num_local+=local;
     num_total+=1;
     if(local)
@@ -68,7 +68,7 @@ class ParamInfo{
   int num_local; //!< # local workers uses the shared parameter
   int num_total; //!< # total workers uses the shared parameter
   int owner_procs; //!< the procs id of the worker that owns the parameter
-  vector<shared_ptr<Param>> shares;
+  vector<Param*> shares;
 };
 
 typedef std::map<int, shared_ptr<ParamInfo>> WorkerShard;
@@ -95,13 +95,12 @@ class Trainer{
   // point.
 
  protected:
-  vector<shared_ptr<Server>> CreateServers(int nthread, const ModelProto& 
mproto,
+  vector<Server*> CreateServers(int nthread, const ModelProto& mproto,
       const vector<int> slices, vector<HandleContext*>* ctx);
-  vector<shared_ptr<Worker>> CreateWorkers(int nthread,
-      const ModelProto& mproto, vector<int> *slice_size);
+  vector<Worker*> CreateWorkers(int nthread, const ModelProto& mproto,
+      vector<int> *slice_size);
 
-  void Run(const vector<shared_ptr<Worker>>& workers,
-      const vector<shared_ptr<Server>>& servers);
+  void Run(const vector<Worker*>& workers, const vector<Server*>& servers);
   /**
    * Register default implementations for all base classes used in the system,
    * e.g., the Updater, BaseMsg, etc.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/trainer/worker.h
----------------------------------------------------------------------
diff --git a/include/trainer/worker.h b/include/trainer/worker.h
index 04a68ea..3283ee9 100644
--- a/include/trainer/worker.h
+++ b/include/trainer/worker.h
@@ -19,7 +19,7 @@ const int kCollectSleepTime=5;//milliseconds;
 class Worker {
  public:
   Worker(int thread_id, int group_id, int worker_id);
-  ~Worker(){}
+  virtual ~Worker(){}
   void Setup(const ModelProto& model, shared_ptr<NeuralNet> train_net);
   void set_test_net(shared_ptr<NeuralNet> test_net){
     test_net_=test_net;
@@ -29,10 +29,10 @@ class Worker {
   }
 
   void Stop();
-  int Put(shared_ptr<Param> param, int step);
-  int Get(shared_ptr<Param> param, int step);
-  int Update(shared_ptr<Param> param, int step);
-  int Collect(shared_ptr<Param> param, int step);
+  int Put(Param* param, int step);
+  int Get(Param* param, int step);
+  int Update(Param* param, int step);
+  int Collect(Param* param, int step);
   int CollectAll(shared_ptr<NeuralNet> net, int step);
   /**
     * check validation/test firstly, then TrainOneBatch
@@ -49,7 +49,8 @@ class Worker {
   /**
    * Test/validate one mini-batch.
    */
-  virtual void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net, 
Metric* perf)=0;
+  virtual void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net,
+      Metric* perf)=0;
   /**
     * Test the perforance of the learned model on validation or test dataset.
     * Test is done by the first group.
@@ -77,7 +78,7 @@ class Worker {
   const bool DisplayDebugInfo(const int step) const {
     return DisplayNow(step)&&modelproto_.debug()&&group_id_==0;
   }
-  const void DisplayPerformance(const Metric & perf, const string& prefix);
+  void DisplayPerformance(const string& prefix, const Metric & perf);
 
   /**
    * return true if the stop condition is satisfied, e.g., the maximum number
@@ -142,9 +143,11 @@ class BPWorker: public Worker{
  public:
   BPWorker(int thread_id, int group_id, int worker_id);
   ~BPWorker(){}
-  virtual void TrainOneBatch(int step, Metric* perf);
-  virtual void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net, 
Metric* perf);
-  void Forward(int step, Phase phase, shared_ptr<NeuralNet> net);
+  void TrainOneBatch(int step, Metric* perf) override;
+  void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net,
+      Metric* perf) override;
+
+  void Forward(int step, Phase phase, shared_ptr<NeuralNet> net, Metric* perf);
   void Backward(int step, shared_ptr<NeuralNet> net);
 };
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/utils/common.h
----------------------------------------------------------------------
diff --git a/include/utils/common.h b/include/utils/common.h
index 619e06a..022a1dd 100644
--- a/include/utils/common.h
+++ b/include/utils/common.h
@@ -3,10 +3,11 @@
 
 #include <google/protobuf/message.h>
 #include <stdlib.h>
-#include <map>
+#include <unordered_map>
 #include <sstream>
 #include <string>
 #include <vector>
+#include "proto/common.pb.h"
 
 namespace singa {
 
@@ -40,54 +41,39 @@ inline float rand_real() {
 const std::string GetHostIP();
 void SetupLog(const std::string& workspace, const std::string& model);
 
+/**
+ * Performance mtrics.
+ */
 class Metric {
  public:
-  Metric() : counter_(0) {}
-  inline void AddMetric(const std::string& name, float value) {
-    std::string prefix = name;
-    if (name.find("@") != std::string::npos)
-      prefix = name.substr(0, name.find("@"));
-    if (data_.find(prefix) == data_.end())
-      data_[prefix] = value;
-    else
-      data_[prefix] += value;
-  }
-  inline void AddMetrics(const Metric& other) {
-    for (auto& entry : other.data_)
-      AddMetric(entry.first, entry.second);
-  }
-  inline void Reset() {
-    data_.clear();
-    counter_ = 0;
-  }
-  inline void Inc() { ++counter_; }
-  inline std::string ToString() const {
-    std::string disp = std::to_string(data_.size()) + " fields, ";
-    for (const auto& entry : data_) {
-      disp += entry.first + " : " + std::to_string(entry.second / counter_)
-              + "\t";
-    }
-    return disp;
-  }
-  inline void ParseString(const std::string& perf) {
-    std::stringstream stream(perf);
-    int n;
-    std::string str;
-    stream >> n >> str;
-    for (int i = 0; i < n; ++i) {
-      float f;
-      std::string sep;
-      stream >> str >> sep >> f;
-      data_[str] = f;
-    }
-    counter_ = 1;
-  }
-
+  /**
+   * Add one metric.
+   *
+   * If the metric exist, the aggregate. Otherwise create a new entry for it.
+   *
+   * @param name metric name, e.g., 'loss'
+   * @param value metric value
+   */
+  void Add(const std::string& name, float value);
+  /**
+   * reset all metric counter and value to 0
+   */
+  void Reset();
+  /**
+   * Generate a one line string for logging
+   */
+  const std::string ToLogString() const;
+  /**
+   * Serialize the object into a string
+   */
+  const std::string ToString() const;
+  /**
+   * Parse the metric from a string
+   */
+  void ParseFrom(const std::string& msg);
  private:
-  std::map<std::string, float> data_;
-  int counter_;
+  std::unordered_map<std::string, std::pair<int, float>> entry_;
 };
-
 }  // namespace singa
 
 #endif  // SINGA_UTILS_COMMON_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/utils/graph.h
----------------------------------------------------------------------
diff --git a/include/utils/graph.h b/include/utils/graph.h
index 93348dd..04e122d 100644
--- a/include/utils/graph.h
+++ b/include/utils/graph.h
@@ -1,150 +1,101 @@
-#ifndef INCLUDE_UTILS_GRAPH_H_
-#define INCLUDE_UTILS_GRAPH_H_
-#include <glog/logging.h>
+#ifndef SINGA_UTILS_GRAPH_H_
+#define SINGA_UTILS_GRAPH_H_
 #include <vector>
 #include <string>
 #include <map>
 #include <stack>
 #include <memory>
 
+namespace singa {
 using std::vector;
 using std::string;
 using std::map;
-using std::pair;
-using std::shared_ptr;
-using std::make_shared;
 
-
-typedef struct _LayerInfo{
-  // origin identifies the origin of this node, i.e., the corresponding layer
-  string origin;
-  //int locationid;// locationidation id;
-  int partitionid;
-  int slice_dimension;
-  int concate_dimension;
-}LayerInfo;
-typedef LayerInfo V;
-
-
-class Node;
-typedef shared_ptr<Node> SNode;
-
-class Node{
+class Node {
  public:
-  typedef shared_ptr<Node> SNode;
-  Node(string name): name_(name){}
-  Node(string name, const V& v):
-    name_(name), val_(v){}
+  /**
+   * Node constructor.
+   *
+   * @param name name of the corresponding layer
+   */
+  explicit Node(string name);
+  /**
+   * Node constructor.
+   *
+   * This node is a partition of some node.
+   * @param name node name
+   * @param origin  name of the original node
+   * @param id partition id of this node
+   * @param proto conf of the corresponding layer
+   */
+  Node(const string& name, const string& origin, int id, void* proto);
+  ~Node();
+  void AddDstNode(Node* dstnode);
+  void AddSrcNode(Node* srcnode);
+  void RemoveDstNode(Node* dst);
+  void RemoveSrcNode(Node* src);
 
-  void AddDstNode(SNode dstnode){
-    dstnodes_.push_back(dstnode);
-  }
-  void AddSrcNode(SNode srcnode){
-    srcnodes_.push_back(srcnode);
-  }
-
-  void RemoveDstNode(SNode dst){
-    auto iter=dstnodes_.begin();
-    while((*iter)->name_!=dst->name_&&iter!=dstnodes_.end()) iter++;
-    CHECK((*iter)->name_==dst->name_);
-    dstnodes_.erase(iter);
-  }
-  void RemoveSrcNode(SNode src){
-    auto iter=srcnodes_.begin();
-    while((*iter)->name_!=src->name_&&iter!=srcnodes_.end()) iter++;
-    CHECK((*iter)->name_==src->name_);
-    srcnodes_.erase(iter);
-  }
-  const string& name() const {return name_;}
-  const V& val() const {return val_;}
-  const SNode srcnodes(int k) const {return srcnodes_[k]; }
-  const SNode dstnodes(int k) const {return dstnodes_[k]; }
-  const vector<SNode>& srcnodes() const {return srcnodes_; }
-  const vector<SNode>& dstnodes() const {return dstnodes_; }
-  int  dstnodes_size() const {return dstnodes_.size(); }
-  int  srcnodes_size() const {return srcnodes_.size(); }
-
- private:
-  string name_;
-  vector<SNode> srcnodes_;
-  vector<SNode> dstnodes_;
+ public:
+  string name;
+  //! name of the origin node/layer from which is node is derived
+  string origin;
+  //! partition id
+  int partition_id;
+  //! proto of the corresponding layer
+  void* proto;
 
-  V val_;
-    // properties
-  string color_, weight_, shape_;
+  vector<Node*> srcnodes;
+  vector<Node*> dstnodes;
 };
 
-
 /**
- * For partition neuralnet and displaying the neuralnet structure
+ * Neuralnet is constructed by creating a graph with each node representing one
+ * layer at first. After topology sort for graph nodes, layers are created and
+ * connected.
  */
-class Graph{
+class Graph {
  public:
-  Graph(){}
-  void Sort();
-  const SNode& AddNode(string name, V origin){
-    nodes_.push_back(make_shared<Node>(name, origin));
-    name2node_[name]=nodes_.back();
-    return nodes_.back();
-  }
-  const SNode& AddNode(string name){
-    nodes_.push_back(make_shared<Node>(name));
-    name2node_[name]=nodes_.back();
-    return nodes_.back();
-  }
-
-  void AddEdge(SNode srcnode, SNode dstnode){
-    srcnode->AddDstNode(dstnode);
-    dstnode->AddSrcNode(srcnode);
-  }
-
-  void AddEdge(const string& src, const string& dst){
-    CHECK(name2node_.find(src)!=name2node_.end())<<"can't find src node "<<src;
-    CHECK(name2node_.find(dst)!=name2node_.end())<<"can't find dst node "<<dst;
-
-    SNode srcnode=name2node_[src], dstnode=name2node_[dst];
-    AddEdge(srcnode, dstnode);
-  }
-
-  void RemoveEdge(const string &src, const string& dst){
-    CHECK(name2node_.find(src)!=name2node_.end())<<"can't find src node "<<src;
-    CHECK(name2node_.find(dst)!=name2node_.end())<<"can't find dst node "<<dst;
-
-    SNode srcnode=name2node_[src], dstnode=name2node_[dst];
-    RemoveEdge(srcnode, dstnode);
-  }
-
-  void RemoveEdge(SNode src, SNode dst){
-    src->RemoveDstNode(dst);
-    dst->RemoveSrcNode(src);
-  }
-
-  const vector<SNode>& nodes() const{
+  Graph() {}
+  ~Graph();
+  /**
+   * @return all nodes of the graph
+   */
+  const vector<Node*>& nodes() const {
     return nodes_;
-  };
-
-  const SNode& node(string name) const{
-    CHECK(name2node_.find(name)!= name2node_.end())
-      <<"can't find dst node "<<name;
+  }
+  /**
+   * @param name node name
+   * @return return the node of given name
+   */
+  Node* node(const string& name) const {
     return name2node_.at(name);
   }
 
-  const string ToString() const;
-  const string ToString(const map<string, string>& info) const ;
-
-  bool Check() const;
-
-  SNode InsertSliceNode(SNode srcnode, const vector<SNode>& dstnodes,
-      const V& info, bool connect_dst=true);
-  SNode InsertConcateNode(const vector<SNode>&srcnodes, SNode dstnode,
-      const V& info);
-  SNode InsertSplitNode(SNode srcnode, const vector<SNode>& dstnodes);
-  std::pair<SNode, SNode> InsertBridgeNode(SNode srcnode, SNode dstnode);
-  void topology_sort_inner(SNode node, map<string, bool> *visited,
-    std::stack<string> *stack);
+  void AddNode(Node* node);
+  Node* AddNode(const string& name);
+  void AddEdge(Node* srcnode, Node* dstnode);
+  void AddEdge(const string& src, const string& dst);
+  void RemoveEdge(Node* src, Node* dst);
+  void RemoveEdge(const string &src, const string& dst);
+  /**
+   * Dump the graph into json string which can be used to draw a picture by
+   * graphviz
+   */
+  const string ToJson() const;
+  /**
+   * \copybreif ToJson()
+   *
+   * @param info info associated with each node
+   */
+  const string ToJson(const map<string, string>& info) const;
+  /**
+   * Do topology sort for all nodes of the graph.
+   */
+  void Sort();
 
  private:
-  vector<SNode> nodes_;
-  map<string, SNode> name2node_;
+  vector<Node*> nodes_;
+  map<string, Node*> name2node_;
 };
-#endif // INCLUDE_UTILS_GRAPH_H_
+}  // namespace singa
+#endif  // SINGA_UTILS_GRAPH_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/utils/param.h
----------------------------------------------------------------------
diff --git a/include/utils/param.h b/include/utils/param.h
index 61e862b..781fdb6 100644
--- a/include/utils/param.h
+++ b/include/utils/param.h
@@ -115,7 +115,7 @@ class Param {
    *
    * @param other the Param object whose owner owns the data blob
    */
-  void ShareData(shared_ptr<Param> other){
+  void ShareData(Param* other){
     proto_.set_owner(other->owner());
     if(data_!=nullptr)
       CHECK(std::equal(data_->shape().begin(), data_->shape().end(),

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/include/utils/updater.h
----------------------------------------------------------------------
diff --git a/include/utils/updater.h b/include/utils/updater.h
index 2a6dd43..0d408d8 100644
--- a/include/utils/updater.h
+++ b/include/utils/updater.h
@@ -12,7 +12,7 @@ class Updater{
   virtual void Init(const UpdaterProto &proto){
     proto_=proto;
   }
-  virtual void Update(int step, shared_ptr<Param> param, float 
grad_scale=1.0f)=0;
+  virtual void Update(int step, Param* param, float grad_scale=1.0f)=0;
 
   float GetLearningRate(int step);
  protected:
@@ -21,7 +21,7 @@ class Updater{
 class SGDUpdater : public Updater{
  public:
   virtual void Init(const UpdaterProto& proto);
-  virtual void Update(int step, shared_ptr<Param> param, float 
grad_scale=1.0f);
+  virtual void Update(int step, Param* param, float grad_scale=1.0f);
 
  protected:
   float base_lr_;
@@ -31,7 +31,7 @@ class SGDUpdater : public Updater{
 class NesterovUpdater : public Updater{
  public:
   virtual void Init(const UpdaterProto& proto);
-  virtual void Update(int step, shared_ptr<Param> param, float 
grad_scale=1.0f);
+  virtual void Update(int step, Param* param, float grad_scale=1.0f);
 
  protected:
   float base_lr_;
@@ -41,7 +41,7 @@ class NesterovUpdater : public Updater{
 class AdaGradUpdater : public Updater{
  public:
   virtual void Init(const UpdaterProto& proto);
-  virtual void Update(int step, shared_ptr<Param> param, float 
grad_scale=1.0f);
+  virtual void Update(int step, Param* param, float grad_scale=1.0f);
 
  protected:
   float base_lr_;
@@ -52,7 +52,7 @@ class AdaGradUpdater : public Updater{
 class RMSPropUpdater : public Updater{
  public:
   virtual void Init(const UpdaterProto& proto);
-  virtual void Update(int step, shared_ptr<Param> param, float 
grad_scale=1.0f);
+  virtual void Update(int step, Param* param, float grad_scale=1.0f);
 
  protected:
   float base_lr_;
@@ -65,7 +65,7 @@ class RMSPropUpdater : public Updater{
 class AdaDeltaUpdater : public Updater{
  public:
   virtual void Init(const UpdaterProto& proto);
-  virtual void Update(int step, shared_ptr<Param> param, float 
grad_scale=1.0f);
+  virtual void Update(int step, Param* param, float grad_scale=1.0f);
 
  protected:
   float rho_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9a6e09fa/src/neuralnet/base_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/base_layer.cc b/src/neuralnet/base_layer.cc
index 95628cb..e5fd822 100644
--- a/src/neuralnet/base_layer.cc
+++ b/src/neuralnet/base_layer.cc
@@ -9,76 +9,46 @@
 #include "neuralnet/base_layer.h"
 
 namespace singa {
-/********* Implementation for Layer **************/
-void Layer::Init(const LayerProto &proto) {
-  layer_proto_=proto;
-}
-
-void Layer::Init(const Layer& other, const vector<int>& shape){
-  data_.Reshape(shape);
-  grad_.Reshape(shape);
-  layer_proto_=other.layer_proto_;
-}
-void Layer::Setup(){
-  Setup(layer_proto_, srclayers_);
-}
-void Layer::SetupAfterPartition(){
-  vector<int> shape=data_.shape();
-  SetupAfterPartition(layer_proto_, shape, srclayers_);
-  //LOG(ERROR)<<name()<<":"<<IntVecToString(shape_);
-  CHECK(std::equal(shape.begin(), shape.end(), data_.shape().begin()))<<name()
-    <<IntVecToString(shape)<<"--"<<IntVecToString(data_.shape());
-}
-void Layer::ComputeFeature(Phase phase){
-  ComputeFeature(phase, srclayers_);
-}
-void Layer::ComputeGradient(){
-  ComputeGradient(srclayers_);
-}
-
-void Layer::ToProto(LayerProto *proto, bool copyData) {
-}
-
-/********* Implementation for BridgeSrcLayer **************/
-void BridgeSrcLayer::Setup(const LayerProto& proto,
-    const vector<SLayer>& srclayers){
-  CHECK_EQ(srclayers.size(),1);
-  data_.Reshape(srclayers[0]->data(this).shape());
-  grad_.ReshapeLike(data_);
-}
-void BridgeSrcLayer::SetupAfterPartition(){
-  Setup(layer_proto_, srclayers_);
-  //LOG(ERROR)<<name()<<":"<<IntVecToString(shape_);
-}
 
-void BridgeSrcLayer::ComputeFeature(Phase phase,
-    const vector<SLayer>& srclayers){
-}
-void BridgeSrcLayer::ComputeGradient(const vector<SLayer>& srclayers){
+void Layer::Setup(const LayerProto& proto, int npartitions) {
+  CHECK_GE(npartitions, 1);
+  layer_proto_ = proto;
+}
+
+const string Layer::DebugString(int step, Phase phase) {
+  string ret =StringPrintf("Layer %10s ", name().c_str());
+  if(data_.count() != 0)
+    return ret;
+  if(phase == kForward) {
+    ret += StringPrintf("data %10s data norm1 %13.9f", data_.asum_data());
+  }else if(phase == kBackward) {
+    ret += StringPrintf("grad norm1 %13.9f\n", grad_.asum_data());
+    for(Param* p: GetParams())
+      ret += StringPrintf("param id %2d, name %10s,\
+          value norm1 %13.9f, grad norm1 %13.9f\n",
+          p->id(), p->name().c_str(),
+          p->data().asum_data(), p->grad().asum_data());
+  }
+  return ret;
 }
-
 /********* Implementation for BridgeDstLayer **************/
-void BridgeDstLayer::Setup(const LayerProto& proto,
-    const vector<SLayer>& srclayers){
-  CHECK_EQ(srclayers.size(),1);
-  data_.Reshape(srclayers[0]->data(this).shape());
+void BridgeDstLayer::Setup(const LayerProto& proto, int npartitions) {
+  Layer::Setup(proto, npartitions);
+  CHECK_EQ(srclayers_.size(),1);
+  data_.Reshape(srclayers_[0]->data(this).shape());
   grad_.ReshapeLike(data_);
 }
-void BridgeDstLayer::SetupAfterPartition(){
-  Setup(layer_proto_, srclayers_);
-  //LOG(ERROR)<<name()<<":"<<IntVecToString(shape_);
-}
-
 
 /************* Implementation for ConcateLayer ***********/
-void ConcateLayer::Setup(const LayerProto& proto,
-    const vector<SLayer>& srclayers){
-  size_t concate_dim=proto.concate_conf().concate_dimension();
+void ConcateLayer::Setup(const LayerProto& proto, int npartitions) {
+  // CHECK_EQ(npartitions, 1);
+  Layer::Setup(proto, npartitions);
+  size_t concate_dim=proto.concate_conf().concate_dim();
   CHECK_GE(concate_dim,0);
-  CHECK_GT(srclayers.size(),1);
-  vector<int> shape=srclayers[0]->data(this).shape();
-  for(size_t i=1;i<srclayers.size();i++){
-    const vector<int>& srcshape=srclayers[i]->data(this).shape();
+  CHECK_GT(srclayers_.size(),1);
+  vector<int> shape=srclayers_[0]->data(this).shape();
+  for(size_t i=1;i<srclayers_.size();i++){
+    const vector<int>& srcshape=srclayers_[i]->data(this).shape();
     for(size_t j=0;j<shape.size();j++)
       if(j==concate_dim)
         shape[j]+=srcshape[j];
@@ -89,19 +59,18 @@ void ConcateLayer::Setup(const LayerProto& proto,
   grad_.Reshape(shape);
 }
 
-void ConcateLayer::SetupAfterPartition(){
-  Setup(layer_proto_, srclayers_);
-//  LOG(ERROR)<<name()<<":"<<IntVecToString(shape_);
+void ConcateLayer::ComputeFeature(Phase phase, Metric *perf){
+  LOG(FATAL) << "Not implemented for Concate Layer";
 }
 
-void ConcateLayer::ComputeFeature(Phase phase, const vector<SLayer>& 
srclayers){}
-
-void ConcateLayer::ComputeGradient(const vector<shared_ptr<Layer>>& 
srclayers){}
+void ConcateLayer::ComputeGradient(Phase phase){
+  LOG(FATAL) << "Not implemented for Concate Layer";
+}
 
 /************* Implementation for ParserLayer ***********/
-void ParserLayer::ComputeFeature(Phase phase, const vector<SLayer>& srclayers){
-  CHECK_EQ(srclayers.size(),1);
-  auto datalayer=static_cast<DataLayer*>(srclayers.begin()->get());
+void ParserLayer::ComputeFeature(Phase phase, Metric *perf){
+  CHECK_EQ(srclayers_.size(),1);
+  auto datalayer=static_cast<DataLayer*>(*srclayers_.begin());
   ParseRecords(phase, datalayer->records(), &data_);
 }
 
@@ -109,12 +78,11 @@ void ParserLayer::ComputeFeature(Phase phase, const 
vector<SLayer>& srclayers){
 void PrefetchLayer::Prefetch(Phase phase){
   //clock_t s=clock();
   for(auto layer: sublayers_)
-    layer->ComputeFeature(phase);
+    layer->ComputeFeature(phase, nullptr);
   //LOG(ERROR)<<(clock()-s)*1.0/CLOCKS_PER_SEC;
 }
 
-void PrefetchLayer::ComputeFeature(Phase phase,
-    const vector<SLayer>& srclayers){
+void PrefetchLayer::ComputeFeature(Phase phase, Metric* perf){
   if(thread_.joinable())
     thread_.join();
   else{
@@ -128,27 +96,27 @@ void PrefetchLayer::ComputeFeature(Phase phase,
   thread_=std::thread(&PrefetchLayer::Prefetch, this, phase);
 }
 
-void PrefetchLayer::Setup(const LayerProto& proto,
-    const vector<SLayer>& srclayers){
+void PrefetchLayer::Setup(const LayerProto& proto, int npartitions) {
+  Layer::Setup(proto, npartitions);
+  // CHECK_EQ(npartitions, 1);
   Factory<Layer>* factory=Singleton<Factory<Layer>>::Instance();
   const auto& sublayers=proto.prefetch_conf().sublayers();
   CHECK_GE(sublayers.size(), 1);
-  map<string, SLayer> layers;
+  map<string, Layer*> layers;
   for(auto const &p:sublayers){
-    auto layer=shared_ptr<Layer>(factory->Create(p.type()));
-    layer->Init(p);
+    auto layer=factory->Create(p.type());
     sublayers_.push_back(layer);
     layers[p.name()]= layer;
   }
   // TODO topology sort layers
   auto layer=sublayers_.begin();
-  for(auto const &p:sublayers){
-    std::vector<SLayer> src;
+  for(auto const &p : sublayers){
+    std::vector<Layer*> src;
     for(auto const &srcname: p.srclayers()){
       src.push_back(layers[srcname]);
-      (*layer)->AddSrcLayer(layers[srcname]);
+      (*layer)->add_srclayer(layers[srcname]);
     }
-    (*layer)->Setup(p, src);
+    (*layer)->Setup(p);
     layer++;
   }
   for(auto layer: sublayers_)
@@ -177,15 +145,18 @@ Blob<float>* PrefetchLayer::mutable_data(const Layer* 
from) {
 PrefetchLayer::~PrefetchLayer(){
   if(thread_.joinable())
     thread_.join();
+  for(auto layer : sublayers_)
+    delete layer;
 }
 /************* Implementation for SliceLayer****************/
-void SliceLayer::Setup(const LayerProto& proto,
-    const vector<SLayer>& srclayers){
-  slice_dim_=proto.slice_conf().slice_dimension();
-  slice_num_=proto.slice_conf().slice_num();
+void SliceLayer::Setup(const LayerProto& proto, int npartitions){
+  // CHECK_EQ(npartitions, 1);
+  Layer::Setup(proto, npartitions);
+  slice_dim_=proto.slice_conf().slice_dim();
+  slice_num_= npartitions;
   CHECK_GE(slice_dim_,0);
   CHECK_EQ(slice_num_, dstlayers_.size());
-  data_.Reshape(srclayers[0]->data(this).shape());
+  data_.Reshape(srclayers_[0]->data(this).shape());
   grad_.ReshapeLike(data_);
   datavec_.resize(slice_num_);
   gradvec_.resize(slice_num_);
@@ -201,17 +172,11 @@ void SliceLayer::Setup(const LayerProto& proto,
   }
 }
 
-void SliceLayer::SetupAfterPartition(){
-  Setup(layer_proto_, srclayers_);
-  //LOG(ERROR)<<name()<<":"<<IntVecToString(shape_);
-}
-
-
 int SliceLayer::SliceID(const Layer* layer) const {
   CHECK(layer!= nullptr);
   for(size_t i=0;i<datavec_.size();i++){
     //LOG(ERROR)<<"get slice "<<IntVecToString(shapes_[i]);
-    if(dstlayers_[i].get() == layer)
+    if(dstlayers_[i] == layer)
       return i;
   }
   CHECK(false);
@@ -238,11 +203,10 @@ Blob<float>* SliceLayer::mutable_grad(const Layer* layer){
     return &grad_;
   return &gradvec_[SliceID(layer)];
 }
-void SliceLayer::ComputeFeature(Phase phase,
-    const vector<shared_ptr<Layer>>& srclayers){
-  CHECK_EQ(srclayers.size(),1);
+void SliceLayer::ComputeFeature(Phase phase, Metric *perf) {
+  CHECK_EQ(srclayers_.size(),1);
   if(slice_dim_==0){
-    const auto& blob=srclayers.at(0)->data(this);
+    const auto& blob=srclayers_.at(0)->data(this);
     int size=blob.count()/slice_num_;
     for(int i=0;i<slice_num_;i++){
       float* dst=datavec_[i].mutable_cpu_data();
@@ -251,27 +215,26 @@ void SliceLayer::ComputeFeature(Phase phase,
     }
   }
 }
-void SliceLayer::ComputeGradient(const vector<shared_ptr<Layer>>& srclayers){
-
-}
-
-void SplitLayer::Setup(const LayerProto& proto,
-    const vector<SLayer>& srclayers){
-  CHECK_EQ(srclayers.size(),1);
-  data_.Reshape(srclayers[0]->data(this).shape());
-  grad_.Reshape(srclayers[0]->data(this).shape());
+void SliceLayer::ComputeGradient(Phase phase) {
+  // LOG(FATAL) << "Not implemented";
 }
 
 /************* Implementation for SplitLayer****************/
-void SplitLayer::SetupAfterPartition(){
-  Setup(layer_proto_, srclayers_);
-  //LOG(ERROR)<<name()<<":"<<IntVecToString(shape_);
-}
-void SplitLayer::ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& 
srclayers){
+void SplitLayer::Setup(const LayerProto& proto, int npartitions) {
+  // CHECK_EQ(npartitions, 1);
+  Layer::Setup(proto, npartitions);
 
+  CHECK_EQ(srclayers_.size(),1);
+  data_.Reshape(srclayers_[0]->data(this).shape());
+  grad_.Reshape(srclayers_[0]->data(this).shape());
 }
-void SplitLayer::ComputeGradient(const vector<shared_ptr<Layer>>& srclayers){
 
+void SplitLayer::ComputeFeature(Phase phase, Metric *perf) {
+  LOG(FATAL) << "Not implemented";
+
+}
+void SplitLayer::ComputeGradient(Phase phase) {
+  LOG(FATAL) << "Not implemented";
 }
 
 }  // namespace singa

Reply via email to