[08/10] incubator-singa git commit: Streamlining of tensor.h file by moving respective member functions to cpp or cuda file. Removal of shape_multipliers_ attribute in tensor.h. Changed read-in tensors to be passed as reference instead of pointer

wangwei Sun, 13 May 2018 08:27:01 -0700

Streamlining of tensor.h file by moving respective member functions to cpp or 
cuda file. Removal of shape_multipliers_ attribute in tensor.h. Changed read-in 
tensors to be passed as reference instead of pointer



Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c52e2aa3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c52e2aa3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c52e2aa3

Branch: refs/heads/master
Commit: c52e2aa3b5272750960ce6d3ae9f14bad1cee397
Parents: a44d2e7
Author: Vaan Ng <[email protected]>
Authored: Sun May 13 00:24:40 2018 +0800
Committer: Vaan Ng <[email protected]>
Committed: Sun May 13 00:24:40 2018 +0800

----------------------------------------------------------------------
 include/singa/core/tensor.h        |  152 +----
 src/core/tensor/tensor.cc          |   60 +-
 src/core/tensor/tensor_math.h      |  124 ++--
 src/core/tensor/tensor_math_cpp.h  | 1012 +++++++++----------------------
 src/core/tensor/tensor_math_cuda.h |  499 ++++++++-------
 5 files changed, 647 insertions(+), 1200 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c52e2aa3/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index b94a982..e25aafd 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -22,7 +22,6 @@
 #include <vector>
 #include <tuple>
 #include <memory>
-#include <algorithm>
 
 #include "singa/core/common.h"
 #include "singa/core/device.h"
@@ -31,7 +30,6 @@
 
 using std::vector;
 using std::tuple;
-using std::reverse;
 namespace singa {
 
 typedef vector<size_t> Shape;
@@ -104,43 +102,6 @@ class Tensor {
     return shape_.at(idx);
   }
 
-  /*  
-  cudnn requires tensor dimensions to fulfill 1 requirement:
-    1.) Dimensions to be set to a minimum of 4 for 4d and lower dimensional 
tensors 
-        if input tensor is 5d, cudnn will take a 5d tensor as input. Beyond 
5d, certain operations are not supported.
-        (cudnnOp supports up to 5d, cudnnReduce supports up to 8d)
-
-    for e.g. Tensor A has shape {3,3}, cudnn requires shape of {1,1,3,3} to be 
the input
-             Tensor B has shape (2,3,4), cudnn requires shape of {1,2,3,4} to 
be the input
-  */
-  vector<int> generate_shape_cuda() const {
-    vector<int> shape_arr;
-    if(shape_.size() <= 4){
-      for (size_t n=0; n<4-shape_.size(); ++n) {
-        shape_arr.push_back(1);
-      } 
-      for (size_t n=0; n<shape_.size(); ++n) {
-        shape_arr.push_back(shape_.at(n));
-      } 
-      return shape_arr;
-    } else if(shape_.size() == 5){
-      for (size_t n=0; n<shape_.size(); ++n) {
-        shape_arr.push_back(shape_.at(n));
-      } 
-      return shape_arr;
-    } else {
-      LOG(FATAL) << "Dimensions (shape) beyond 5 are currently not supported" ;
-    }
-  }
-
-  int generate_dim_cuda() const {
-    if(shape_.size() <= 4){return 4;}
-    else if(shape_.size() == 5){return 5;}
-    else{
-      LOG(FATAL) << "Dimensions (shape) beyond 5 are currently not supported" ;
-    } 
-  }
-
   size_t nDim() const { return shape_.size(); }
 
   bool empty() const { return nDim() == 0; }
@@ -150,40 +111,6 @@ class Tensor {
 
   const vector<int>& strides() const { return strides_; }
 
-  /*  
-  cudnn requires stride dimensions to conform to the format of the shape input 
as well
-    1.) Stride dimensions to be set to a minimum of 4 for 4d and lower 
dimensional tensors
-        If input tensor is 5d, cudnn will take a 5d tensor as input. Beyond 
5d, certain operations are not supported.
-        (cudnnOp supports up to 5d, cudnnReduce supports up to 8d)
-
-    for e.g. Tensor A has shape {3,3}, stride {3,1}, cudnn requires shape 
{1,1,3,3} and stride {9, 9, 3, 1} or {9, 9, 1, 3} to be the inputs
-  */
-  vector<int> generate_strides_cuda() const {
-    vector<int> strides_arr;
-    int product = 1;
-    for (size_t n=0; n<(shape_.size()); ++n) {
-      product *= shape_[n];
-    }
-    if(shape_.size() <= 4){
-      for (size_t n=0; n<4-shape_.size(); ++n) {
-        strides_arr.push_back(product);
-      } 
-      for (size_t n=0; n<strides_.size(); ++n) {
-          strides_arr.push_back(strides_[n]);
-        }
-      return strides_arr;
-    } else if(shape_.size() == 5){
-      for (size_t n=0; n<strides_.size(); ++n) {
-          strides_arr.push_back(strides_[n]);
-        }
-      return strides_arr;
-    } else {
-      LOG(FATAL) << "Dimensions (strides) beyond 5 are currently not 
supported" ;
-    }
-  }
-
-  const vector<int>& shape_multipliers() const { return shape_multipliers_; }
-
   /// return true if the content of the tensor is initialized
   bool initailized() const {
     return block_ != nullptr && block_->initialized();
@@ -292,7 +219,7 @@ class Tensor {
   float L2() const;
 
   //generate strides automatically if stride field is not passed
-void Generate_Strides(){
+void generate_strides(){
     if(shape_.size()==0){
       strides_ = {1};
       return void();
@@ -306,84 +233,11 @@ void Generate_Strides(){
     }
 };
 
-void Set_Strides(const vector<int> new_strides){
+void set_strides(const vector<int> new_strides){
   strides_ = new_strides;
 }
 
-//generate shape multipliers
-//for e.g. tensor of shape (3,3), stride (1,3) will have shape multipliers of 
(3,1)
-//for e.g. tensor of shape (3,3), stride (3,1) will also have shape 
multipliers of (3,1)
-//this means that the 3rd, 6th, and 9th index of the array will always be the 
starting element of their respective rows
-//so we need to need use the inner stride when jumping from 1st->2nd element, 
and outer stride when jumping from 2nd->3rd
-vector<int> Generate_Shape_Multipliers(Shape y_shape) const {
-    if(y_shape.size()==0){
-      return {1};
-    }
-    reverse(y_shape.begin(), y_shape.end());
-    vector<int> shape_multipliers = {};
-    int cumulative_product = 1;
-
-    shape_multipliers.push_back(1);
-    for (size_t n=0; n<(y_shape.size()-1); ++n) {
-        cumulative_product = cumulative_product*y_shape[n];
-        shape_multipliers.push_back(cumulative_product);
-    }
-    reverse(shape_multipliers.begin(), shape_multipliers.end());
-    return shape_multipliers;
-};
-
-// 
******************************************************************************************
-// Some traversal operations (works on const declarations without modifying 
tensor variables)
-// 
******************************************************************************************
-
-//generate a traversal_info vector based on the tensor's shape for the 
traverse_next function to work
-vector<int> generate_traversal_info() const {
-    vector<int> traversal_info = {};
-    for(size_t n=0; n<(shape_.size()+2); ++n) {
-      traversal_info.push_back(0);
-    }
-    return traversal_info;
-};
-
-//this function checks whether the next index falls on a special multiplier of 
the outer shape
-//so the algorithm knows when to jump over/back to a starting element of the 
outer shape
-//for e.g. in [[1,4,7], [2,5,8], [3,6,9]], elements 1,2,3 are the starting 
elements of their respective rows
-//this additional check only has 1 loop for 2d matrix
-//but runtime performance might degrade to O(nlog(n)) for higher dimensional 
tensors
-int determine_order(int counter) const {
-    for (size_t n=0; n<(shape_multipliers_.size()-1); ++n) {
-        if((counter%shape_multipliers_[n])==0){
-            return ((shape_multipliers_.size()) - 1 - n);
-        }
-    }
-    return 0;
-};
-
-//this function updates the base indexes with the current index after every 
single traversal step, can be generalized beyond 2d cases
-void update_base_index(std::vector<int>& traversal_info) const {
-    for (int n=0; n<(traversal_info[shape_.size()+1]+1); ++n) {
-        traversal_info[n] = traversal_info[shape_.size()];
-    }
-};
-
-//function to traverse a const strided tensor object
-//it requires an additional vector, traversal_info {0,0,0,0 ...}, comprising 
(shape_.size()+2) elements of 0
-//for e.g. 2d matrix:
-//index 0 and 1 store the base row and column index respectively
-//index 2 stores the current index of the traversal
-//index 3 stores the order of the traversal for e.g. if the order is 0, it 
means the next element can be navigated to using the innermost stride
-void traverse_next(std::vector<int>& traversal_info, int counter) const {
-    update_base_index(traversal_info);
-    traversal_info[shape_.size()+1] = determine_order(counter);
-    traversal_info[shape_.size()] = 
traversal_info[traversal_info[shape_.size()+1]]+strides_[strides_.size()-traversal_info[shape_.size()+1]-1];
-};
-
-// 
******************************************************************************************
-// traversal operations end
-// 
******************************************************************************************
-
  protected:
-  //bool transpose_ = false;
   DataType data_type_ = kFloat32;
   std::shared_ptr<Device> device_ = nullptr;
   /// Note: block_ is allocated in lazy manner to avoid frequent malloc/free.
@@ -391,8 +245,6 @@ void traverse_next(std::vector<int>& traversal_info, int 
counter) const {
   Block *block_ = nullptr;
   Shape shape_ = {};
   vector<int> strides_ = {};
-  vector<int> shape_multipliers_ = {};
-
 }; //end of tensor class
 
 typedef Shape::iterator ShapeIter;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c52e2aa3/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 9067242..a4efd64 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -34,7 +34,6 @@ Tensor::~Tensor() {
 Tensor::Tensor() { 
   device_ = defaultDevice;
   strides_ = {1};
-  shape_multipliers_ = {1};
 }
 
 //non-strided constructors 
@@ -43,16 +42,14 @@ Tensor::Tensor(const Shape &shape, DataType dtype)
   size_t size = Product(shape_) * SizeOf(data_type_);
   if (size)
     block_ = device_->NewBlock((int)size);
-  Generate_Strides();
-  shape_multipliers_ = Generate_Shape_Multipliers(shape_);
+  generate_strides();
 }
 Tensor::Tensor(Shape &&shape, DataType dtype)
     : data_type_(dtype), device_(defaultDevice), shape_(shape) {
   size_t size = Product(shape_) * SizeOf(data_type_);
   if (size)
     block_ = device_->NewBlock((int)size);
-  Generate_Strides();
-  shape_multipliers_ = Generate_Shape_Multipliers(shape_);
+  generate_strides();
 }
 
 //non-strided constructors with device
@@ -62,16 +59,14 @@ Tensor::Tensor(const Shape &shape, std::shared_ptr<Device> 
device,
   size_t size = Product(shape_) * SizeOf(data_type_);
   if (size)
     block_ = device_->NewBlock((int)size);
-  Generate_Strides();
-  shape_multipliers_ = Generate_Shape_Multipliers(shape_);
+  generate_strides();
 }
 Tensor::Tensor(Shape &&shape, std::shared_ptr<Device> device, DataType dtype)
     : data_type_(dtype), device_(device), shape_(shape) {
   size_t size = Product(shape_) * SizeOf(data_type_);
   if (size)
     block_ = device_->NewBlock((int)size);
-  Generate_Strides();
-  shape_multipliers_ = Generate_Shape_Multipliers(shape_);
+  generate_strides();
 }
 
 
@@ -81,8 +76,7 @@ Tensor::Tensor(const Tensor &in)
       device_(in.device_),
       block_(in.block()),
       shape_(in.shape_),
-      strides_(in.strides_),
-      shape_multipliers_(in.shape_multipliers_) {
+      strides_(in.strides_) {
   if (block_ != nullptr)
     block_->IncRefCount();
 }
@@ -95,7 +89,6 @@ Tensor::Tensor(const Tensor &in, Shape &new_shape, 
vector<int> &new_strides)
       block_(in.block()),
       shape_(new_shape),
       strides_(new_strides) {
-  shape_multipliers_ = Generate_Shape_Multipliers(shape_);
   if (block_ != nullptr)
     block_->IncRefCount();
 }
@@ -105,8 +98,7 @@ Tensor::Tensor(Tensor &&in)
       data_type_(in.data_type_),
       device_(in.device_),
       shape_(std::move(in.shape_)),
-      strides_(in.strides_),
-      shape_multipliers_(in.shape_multipliers_) {
+      strides_(in.strides_) {
   block_ = in.block_;
   in.block_ = nullptr;
 }
@@ -129,7 +121,6 @@ void Tensor::ResetLike(const Tensor &in) {
   }
   shape_ = in.shape_;
   strides_ = in.strides_;
-  shape_multipliers_ = in.shape_multipliers_;
 }
 
 //if tensor is not transposed yet i.e strides == 1, then we simply change the 
shape and generate new default strides
@@ -146,8 +137,7 @@ void Tensor::Reshape(const Shape &shape) {
     LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not 
implemented yet." ;
   }
   shape_ = shape;
-  Generate_Strides();
-  shape_multipliers_ = Generate_Shape_Multipliers(shape_);
+  generate_strides();
 }
 
 void Tensor::Reshape(Shape &&shape) {
@@ -162,8 +152,7 @@ void Tensor::Reshape(Shape &&shape) {
     LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not 
implemented yet." ;
   }
   shape_ = std::move(shape);
-  Generate_Strides();
-  shape_multipliers_ = Generate_Shape_Multipliers(shape_);
+  generate_strides();
 }
 
 void Tensor::AsType(const DataType type) {
@@ -350,7 +339,6 @@ Tensor Tensor::T() const {
   t.strides_.clear();
   t.strides_.push_back(strides_[1]);
   t.strides_.push_back(strides_[0]);
-  t.shape_multipliers_ = Generate_Shape_Multipliers(t.shape_);
   t.block_ = block_;
   block_->IncRefCount();
   return t;
@@ -359,7 +347,7 @@ Tensor Tensor::T() const {
 //normal transpose without axes
 Tensor Tensor::Transpose() const {
   // if(shape_.size() != strides_.size())
-  //   Generate_Strides();
+  //   generate_strides();
 
   Tensor t;
   t.device_ = device_;
@@ -369,7 +357,6 @@ Tensor Tensor::Transpose() const {
     t.shape_.push_back(shape_[shape_.size()-n-1]);
     t.strides_.push_back(strides_[shape_.size()-n-1]);
   }
-  t.shape_multipliers_ = Generate_Shape_Multipliers(t.shape_);
   t.block_ = block_;
   block_->IncRefCount();
   return t;
@@ -382,7 +369,7 @@ Tensor Tensor::Transpose(Shape axes) const {
   //   return void();
   // }
   // if(shape_.size() != strides_.size())
-  //   Generate_Strides();
+  //   generate_strides();
 
   Tensor t;
   t.device_ = device_;
@@ -392,7 +379,6 @@ Tensor Tensor::Transpose(Shape axes) const {
     t.shape_.push_back(shape_[axes[n]]);
     t.strides_.push_back(strides_[axes[n]]);
   }
-  t.shape_multipliers_ = Generate_Shape_Multipliers(t.shape_);
   t.block_ = block_;
   block_->IncRefCount();
   return t;
@@ -564,7 +550,7 @@ float Tensor::L1() const {
   TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, {
     device_->Exec([&nrm, this](Context *ctx) {
       DType ret = DType(0);
-      Asum<DType, Lang>(this, &ret, ctx);
+      Asum<DType, Lang>(*this, &ret, ctx);
       nrm = TypeCast<DType, float>(ret);
     }, {this->block()}, {});
   });
@@ -577,7 +563,7 @@ float Tensor::L2() const {
   TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, {
     device_->Exec([&nrm, this](Context *ctx) {
       DType ret = DType(0);
-      Nrm2<DType, Lang>(this, &ret, ctx);
+      Nrm2<DType, Lang>(*this, &ret, ctx);
       nrm = TypeCast<DType, float>(ret);
     }, {this->block()}, {});
   });
@@ -603,7 +589,7 @@ template void Tensor::SetValue<int>(const int x);
   do {                                                                 \
     TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { \
       ret->device()->Exec([t, ret](Context * ctx) {                    \
-        fn<DType, Lang>(&t, ret, ctx);       \
+        fn<DType, Lang>(t, ret, ctx);       \
       }, {t.block()}, {ret->block()});                                 \
     });                                                                \
   } while (0)
@@ -632,7 +618,7 @@ GenUnaryTensorFn(Tanh);
     TYPE_LANG_SWITCH(lhs.data_type(), DType, lhs.device()->lang(), Lang, {  \
       CHECK_EQ(sizeof(DType), SizeOf(rhs.data_type()));                     \
       ret->device()->Exec([lhs, rhs, ret](Context * ctx) {                  \
-        fn<DType, Lang>(&lhs, &rhs, ret, \
+        fn<DType, Lang>(lhs, rhs, ret, \
                         ctx);                                               \
       }, {lhs.block(), rhs.block()}, {ret->block()});                       \
     });                                                                     \
@@ -663,7 +649,7 @@ GenBinaryTensorFn(operator>=, GE);
       static_assert(std::is_same<SType, DType>::value,                  \
                     "The Scalar type must match the Tensor data type"); \
       ret->device()->Exec([t, x, ret](Context * ctx) {                  \
-        fn<DType, Lang>(&t, x, ret, ctx);     \
+        fn<DType, Lang>(t, x, ret, ctx);     \
       }, {t.block()}, {ret->block()});                                  \
     });                                                                 \
   } while (0)
@@ -706,7 +692,7 @@ void Div(const SType alpha, const Tensor &in, Tensor *out) {
   TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
     // TODO(wangwei) type cast SType to DType;
     in.device()->Exec([alpha, in, out](Context *ctx) {
-      Div<DType, Lang>(alpha, &in, out, ctx);
+      Div<DType, Lang>(alpha, in, out, ctx);
     }, {in.block()}, {out->block()});
   });
 }
@@ -743,7 +729,7 @@ float Sum<float>(const Tensor &in) {
   TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
     one.device()->Exec([in, one, &s](Context *ctx) {
       DType ret = DType(0);
-      Dot<DType, Lang>(&in, &one, &ret, ctx);
+      Dot<DType, Lang>(in, one, &ret, ctx);
       s = ret;
     }, {in.block(), one.block()}, {});
   });
@@ -776,7 +762,7 @@ Tensor RowMax(const Tensor &in) {
       //size_t nrow = 1;
       //if (in.nDim() > 1) nrow = in.shape(0);
       //size_t ncol = in.Size() / nrow;
-      RowMax<DType, Lang>(&in, &ret, ctx);
+      RowMax<DType, Lang>(in, &ret, ctx);
     }, {in.block()}, {ret.block()});
   });
   return ret;
@@ -1012,7 +998,7 @@ void MultColumn(const Tensor &v, Tensor *M) {
   CheckDataTypeAndLang(*M, v);
   TYPE_LANG_SWITCH(v.data_type(), DType, v.device()->lang(), Lang, {
     v.device()->Exec([M, v](Context *ctx) {
-      DGMM<DType, Lang>(false, M, &v,
+      DGMM<DType, Lang>(false, *M, v,
                         M, ctx);
     }, {M->block(), v.block()}, {M->block()});
   });
@@ -1027,7 +1013,7 @@ void MultRow(const Tensor &v, Tensor *M) {
   CheckDataTypeAndLang(*M, v);
   TYPE_LANG_SWITCH(v.data_type(), DType, v.device()->lang(), Lang, {
     v.device()->Exec([M, v](Context *ctx) {
-      DGMM<DType, Lang>(true, M, &v,
+      DGMM<DType, Lang>(true, *M, v,
                         M, ctx);
     }, {M->block(), v.block()}, {M->block()});
   });
@@ -1113,7 +1099,7 @@ void Axpy(const SType alpha, const Tensor &in, Tensor 
*out) {
   TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
     auto a = TypeCast<SType, DType>(alpha);
     out->device()->Exec([a, in, out](Context *ctx) {
-      Axpy<DType, Lang>(a, &in, out, ctx);
+      Axpy<DType, Lang>(a, in, out, ctx);
     }, {in.block(), out->block()}, {out->block()});
   });
 }
@@ -1143,7 +1129,7 @@ void Mult(const SType alpha, const Tensor &A, const 
Tensor &B, const SType beta,
       auto a = TypeCast<SType, DType>(alpha);
       auto b = TypeCast<SType, DType>(beta);
       C->device()->Exec([a, A, b, B, C](Context *ctx) {
-        GEMV<DType, Lang>(a, &A, &B, b, C, ctx);
+        GEMV<DType, Lang>(a, A, B, b, C, ctx);
       }, {A.block(), B.block()}, {C->block()});
     });
   } else {
@@ -1152,7 +1138,7 @@ void Mult(const SType alpha, const Tensor &A, const 
Tensor &B, const SType beta,
       auto a = TypeCast<SType, DType>(alpha);
       auto b = TypeCast<SType, DType>(beta);
       C->device()->Exec([a, A, b, B, C](Context *ctx) {
-        GEMM<DType, Lang>(a, &A, &B, b, C,
+        GEMM<DType, Lang>(a, A, B, b, C,
                           ctx);
       }, {A.block(), B.block()}, {C->block()});
     });

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c52e2aa3/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h
index c403f30..c7fdfe5 100644
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@ -40,7 +40,7 @@ namespace singa {
 /// 4. Function argument names, use 'num' for total number of elements in
 ///    elementwise operations; use 'in1' 'in2' for in Tensors; use 'out' for
 ///    output Tensor or value. With exceptions for some functions, e.g.,
-///      Scale(const float alpha, const Tensor* in, Tensor* out);
+///      Scale(const float alpha, const Tensor &in, Tensor* out);
 ///    For such cases, use x, v, alpha, etc for scalar types.
 ///    For blas functions, follow the blas style for argument names.
 ///    Use 'M' and 'v' for matrix and vector tensors in functions involving 
both
@@ -50,37 +50,6 @@ namespace singa {
 /// 7. Use size_t for the number of elements, rows or columns.
 /// 8. Use the same name for the Tensor and Tensor level math functions.
 
-// template <typename DType>
-// void TraverseUnary(const Tensor* in, Tensor* out, 
std::function<DType(DType)> func){}
-
-// template <typename DType>
-// void TraverseBinary(const Tensor* in1, const Tensor* in2, Tensor* out, 
std::function<DType(DType, DType)> func){}
-
-template <typename DType>
-void TraverseUnary(const Tensor* in, Tensor* out, std::function<DType(DType)> 
func){
-  DType *outPtr = static_cast<DType *>(out->block()->mutable_data());
-  const DType *inPtr = static_cast<const DType *>(in->block()->data());
-  vector<int> traversal_info = in->generate_traversal_info();
-  for (size_t i = 0; i < in->Size(); i++) { 
-    outPtr[i] = func(inPtr[traversal_info[in->shape().size()]]);
-    in->traverse_next(traversal_info, i+1);
-  }
-}
-
-template <typename DType>
-void TraverseBinary(const Tensor* in1, const Tensor* in2, Tensor* out, 
std::function<DType(DType, DType)> func){
-  DType *outPtr = static_cast<DType *>(out->block()->mutable_data());
-  const DType *in1Ptr = static_cast<const DType *>(in1->block()->data());
-  const DType *in2Ptr = static_cast<const DType *>(in2->block()->data());
-  vector<int> traversal_info_in1 = in1->generate_traversal_info();
-  vector<int> traversal_info_in2 = in2->generate_traversal_info();
-  for (size_t i = 0; i < in1->Size(); i++) {
-    outPtr[i] = func(in1Ptr[traversal_info_in1[in1->shape().size()]], 
in2Ptr[traversal_info_in2[in2->shape().size()]]);
-    in1->traverse_next(traversal_info_in1, i+1);
-    in2->traverse_next(traversal_info_in2, i+1);
-  }
-}
-
 
 // **************************************
 // Element-wise functions
@@ -88,41 +57,41 @@ void TraverseBinary(const Tensor* in1, const Tensor* in2, 
Tensor* out, std::func
 
 /// out[i] = |in[i]|
 template <typename DType, typename Lang>
-void Abs(const Tensor *in, Tensor *out, Context *ctx) {
+void Abs(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "Abs Not Implemented";
 }
 
 /// out[i] = in[i] + x
 template <typename DType, typename Lang>
-void Add(const Tensor *in, const DType x, Tensor *out,
+void Add(const Tensor &in, const DType x, Tensor *out,
          Context *ctx) {
   LOG(FATAL) << "Add Not Implemented";
 }
 
 /// out[i] = in1[i] + in2[i]
 template <typename DType, typename Lang>
-void Add(const Tensor *in1, const Tensor *in2, Tensor *out,
+void Add(const Tensor &in1, const Tensor &in2, Tensor *out,
          Context *ctx) {
   LOG(FATAL) << "Add-Pair Not Implemented";
 }
 /// Clamp every element into [low, high]
 /// if in[i]>high, then out[i]=high; if in[i]<low, then out[i]=low.
 template <typename DType, typename Lang>
-void Clamp(const DType low, const DType high, const Tensor *in,
+void Clamp(const DType low, const DType high, const Tensor &in,
            Tensor *out, Context *ctx) {
   LOG(FATAL) << "Clamp Not Implemented";
 }
 
 /// out[i] = x / in[i]
 template <typename DType, typename Lang>
-void Div(const DType x, const Tensor *in, Tensor *out,
+void Div(const DType x, const Tensor &in, Tensor *out,
          Context *ctx) {
   LOG(FATAL) << "Div Not Implemented";
 }
 
 /// out[i] = in[i] / x
 template <typename DType, typename Lang>
-void Div(const Tensor *in, const DType x, Tensor *out,
+void Div(const Tensor &in, const DType x, Tensor *out,
          Context *ctx) {
   CHECK_NE(x, 0.f);
   EltwiseMult<DType, Lang>(in, DType(1) / x, out, ctx);
@@ -130,101 +99,101 @@ void Div(const Tensor *in, const DType x, Tensor *out,
 
 /// out[i] = in1[i] / in2[i]
 template <typename DType, typename Lang>
-void Div(const Tensor *in1, const Tensor *in2, Tensor *out,
+void Div(const Tensor &in1, const Tensor &in2, Tensor *out,
          Context *ctx) {
   LOG(FATAL) << "Div-Pair Not Implemented";
 }
 
 /// out[i] = in[i] * x
 template <typename DType, typename Lang>
-void EltwiseMult(const Tensor *in, const DType x, Tensor *out,
+void EltwiseMult(const Tensor &in, const DType x, Tensor *out,
                  Context *ctx) {
   LOG(FATAL) << "EltwiseMult Not Implemented";
 }
 
 /// out[i] = in1[i] * in2[i]
 template <typename DType, typename Lang>
-void EltwiseMult(const Tensor *in1, const Tensor *in2, Tensor *out,
+void EltwiseMult(const Tensor &in1, const Tensor &in2, Tensor *out,
                  Context *ctx) {
   LOG(FATAL) << "EltwiseMult-Pair Not Implemented";
 }
 
 /// Base is e, Neper number. out[i]=exp(in[i])
 template <typename DType, typename Lang>
-void Exp(const Tensor *in, Tensor *out, Context *ctx) {
+void Exp(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "Exp Not Implemented";
 }
 
 /// out[i]=(in[i]<=x)?1.f:0.f
 template <typename DType, typename Lang>
-void LE(const Tensor *in, const DType x, Tensor *out,
+void LE(const Tensor &in, const DType x, Tensor *out,
         Context *ctx) {
   LOG(FATAL) << "LE Not Implemented";
 }
 /// out[i]=(in1[i]<=in2[i])?1.f:0.f
 template <typename DType, typename Lang>
-void LE(const Tensor *in1, const Tensor *in2, Tensor *out,
+void LE(const Tensor &in1, const Tensor &in2, Tensor *out,
         Context *ctx) {
   LOG(FATAL) << "Tensor-Tensor LE Not Implemented";
 }
 /// Natual logarithm, the base is e, Neper number out[i]=log(in[i]).
 template <typename DType, typename Lang>
-void Log(const Tensor *in, Tensor *out, Context *ctx) {
+void Log(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "Log Not Implemented";
 }
 /// out[i]=(in[i]<x)?1.f:0.f
 template <typename DType, typename Lang>
-void LT(const Tensor *in, const DType x, Tensor *out,
+void LT(const Tensor &in, const DType x, Tensor *out,
         Context *ctx) {
   LOG(FATAL) << "LT Not Implemented";
 }
 /// out[i]=(in1[i]<in2[i])?1.f:0.f
 template <typename DType, typename Lang>
-void LT(const Tensor *in1, const Tensor *in2, Tensor *out,
+void LT(const Tensor &in1, const Tensor &in2, Tensor *out,
         Context *ctx) {
   LOG(FATAL) << "Tensor-Tensor LT Not Implemented";
 }
 /// out[i]=(in[i]>=x)?1.f:0.f
 template <typename DType, typename Lang>
-void GE(const Tensor *in, const DType x, Tensor *out,
+void GE(const Tensor &in, const DType x, Tensor *out,
         Context *ctx) {
   LOG(FATAL) << "GE Not Implemented";
 }
 /// out[i]=(in1[i]>=in2[i])?1.f:0.f
 template <typename DType, typename Lang>
-void GE(const Tensor *in1, const Tensor *in2, Tensor *out,
+void GE(const Tensor &in1, const Tensor &in2, Tensor *out,
         Context *ctx) {
   LOG(FATAL) << "Tensor-Tensor GE Not Implemented";
 }
 /// out[i]=(in[i]>x)?1.f:0.f
 template <typename DType, typename Lang>
-void GT(const Tensor *in, const DType x, Tensor *out,
+void GT(const Tensor &in, const DType x, Tensor *out,
         Context *ctx) {
   LOG(FATAL) << "GT Not Implemented";
 }
 /// out[i]=(in[i]>in2[i])?1.f:0.f
 template <typename DType, typename Lang>
-void GT(const Tensor *in, const Tensor *in2, Tensor *out,
+void GT(const Tensor &in, const Tensor &in2, Tensor *out,
         Context *ctx) {
   LOG(FATAL) << "Tensor-Tensor GT Not Implemented";
 }
 /// out[i] = pow(in[i], x)
 template <typename DType, typename Lang>
-void Pow(const Tensor *in, const DType x, Tensor *out,
+void Pow(const Tensor &in, const DType x, Tensor *out,
          Context *ctx) {
   LOG(FATAL) << "Pow Not Implemented";
 }
 
 /// out[i]=pow(in1[i], in2[i])
 template <typename DType, typename Lang>
-void Pow(const Tensor *in1, const Tensor *in2, Tensor *out,
+void Pow(const Tensor &in1, const Tensor &in2, Tensor *out,
          Context *ctx) {
   LOG(FATAL) << "Pow-Pair Not Implemented";
 }
 
 /// out[i]=max(0, in[i])
 template <typename DType, typename Lang>
-void ReLU(const Tensor *in, Tensor *out, Context *ctx) {
+void ReLU(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "ReLU Not Implemented";
 }
 
@@ -235,50 +204,50 @@ void Set(const DType x, Tensor *out, Context *ctx) {
 }
 /// out[i]=sigmoid(in[i])
 template <typename DType, typename Lang>
-void Sigmoid(const Tensor *in, Tensor *out, Context *ctx) {
+void Sigmoid(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "Sigmoid Not Implemented";
 }
 
 /// out[i] = sign(in[i])
 template <typename DType, typename Lang>
-void Sign(const Tensor *in, Tensor *out, Context *ctx) {
+void Sign(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "Sign Not Implemented";
 }
 /// out[i]=sqrt(in[i])
 template <typename DType, typename Lang>
-void Sqrt(const Tensor *in, Tensor *out, Context *ctx) {
+void Sqrt(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "Sqrt Not Implemented";
 }
 
 /// out[i]=square(in[i])
 template <typename DType, typename Lang>
-void Square(const Tensor *in, Tensor *out, Context *ctx) {
+void Square(const Tensor &in, Tensor *out, Context *ctx) {
   EltwiseMult<DType, Lang>(in, in, out, ctx);
 }
 
 /// out[i] =  in[i] - x
 template <typename DType, typename Lang>
-void Sub(const Tensor *in, const DType x, Tensor *out,
+void Sub(const Tensor &in, const DType x, Tensor *out,
          Context *ctx) {
   Add<DType, Lang>(in, -x, out, ctx);
 }
 
 /// out[i] = in1[i] - in2[i]
 template <typename DType, typename Lang>
-void Sub(const Tensor *in1, const Tensor *in2, Tensor *out,
+void Sub(const Tensor &in1, const Tensor &in2, Tensor *out,
          Context *ctx) {
   LOG(FATAL) << "Sub-Pair Not Implemented";
 }
 
 /// sum all elements of in into out
 template <typename DType, typename Lang>
-void Sum(const Tensor *in, DType *out, Context *ctx) {
+void Sum(const Tensor &in, DType *out, Context *ctx) {
   LOG(FATAL) << "Sum Not Implemented";
 }
 
 /// out[i]=tanh(in[i])
 template <typename DType, typename Lang>
-void Tanh(const Tensor *in, Tensor *out, Context *ctx) {
+void Tanh(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "Tanh Not Implemented";
 }
 
@@ -313,31 +282,31 @@ void Uniform(const float low, const float high, Tensor 
*out,
 
 /// outurn the index of the element with the max value.
 template <typename DType, typename Lang>
-void Amax(const Tensor *in, size_t *out, Context *ctx) {
+void Amax(const Tensor &in, size_t *out, Context *ctx) {
   LOG(FATAL) << "Amax Not Implemented";
 }
 
 /// outurn the index of the element with the min value.
 template <typename DType, typename Lang>
-void Amin(const Tensor *in, size_t *out, Context *ctx) {
+void Amin(const Tensor &in, size_t *out, Context *ctx) {
   LOG(FATAL) << "Amin Not Implemented";
 }
 /// out = sum |x| for all x in in
 template <typename DType, typename Lang>
-void Asum(const Tensor *in, DType *out, Context *ctx) {
+void Asum(const Tensor &in, DType *out, Context *ctx) {
   LOG(FATAL) << "Asum Not Implemented";
 }
 
 /// out = alpha * in + out
 template <typename DType, typename Lang>
-void Axpy(const DType alpha, const Tensor *in, Tensor *out,
+void Axpy(const DType alpha, const Tensor &in, Tensor *out,
           Context *ctx) {
   LOG(FATAL) << "Axpy Not Implemented";
 }
 
 /// out = ||in||_2^2, i.e, L2 norm.
 template <typename DType, typename Lang>
-void Nrm2(const Tensor *in, float *out, Context *ctx) {
+void Nrm2(const Tensor &in, float *out, Context *ctx) {
   LOG(FATAL) << "Nrm2 Not Implemented";
 }
 
@@ -349,7 +318,7 @@ void Scale(const DType x, Tensor *out, Context *ctx) {
 
 /// inner product of array in1 and in2
 template <typename DType, typename Lang>
-void Dot(const Tensor *in1, const Tensor *in2, DType *out,
+void Dot(const Tensor &in1, const Tensor &in2, DType *out,
          Context *ctx) {
   LOG(FATAL) << "Dot Not Implemented";
 }
@@ -358,7 +327,7 @@ void Dot(const Tensor *in1, const Tensor *in2, DType *out,
 /// transA indicates if the internal data layout is transposed of A
 template <typename DType, typename Lang>
 void GEMV(const DType alpha,
-          const Tensor *A, const Tensor *v, const DType beta, Tensor *out,
+          const Tensor &A, const Tensor &v, const DType beta, Tensor *out,
           Context *ctx) {
   LOG(FATAL) << "GEMV Not Implemented";
 }
@@ -367,7 +336,7 @@ void GEMV(const DType alpha,
 /// if matrix_lef_side is true, do M*v; else do v*M
 template <typename DType, typename Lang>
 void DGMM(const bool side_right,
-  const Tensor *M, const Tensor *v, Tensor *out, Context *ctx) {
+  const Tensor &M, const Tensor &v, Tensor *out, Context *ctx) {
   LOG(FATAL) << "DGMM Not Implemented";
 }
 
@@ -375,7 +344,7 @@ void DGMM(const bool side_right,
 /// transA indicates if the internal data layout is transposed of A
 template <typename DType, typename Lang>
 void GEMM(const DType alpha,
-          const Tensor *A, const Tensor *B, const DType beta, Tensor *C,
+          const Tensor &A, const Tensor &B, const DType beta, Tensor *C,
           Context *ctx) {
   LOG(FATAL) << "GEMM Not Implemented";
 }
@@ -396,7 +365,7 @@ void SoftmaxCrossEntropyBwd(bool int_target, const size_t 
batchsize,
 }
 
 template <typename DType, typename Lang>
-void RowMax(const Tensor *in, Tensor *out, Context* ctx) {
+void RowMax(const Tensor &in, Tensor *out, Context* ctx) {
   LOG(FATAL) << "Not Implemented";
 }
 // **************************************
@@ -405,28 +374,28 @@ void RowMax(const Tensor *in, Tensor *out, Context* ctx) {
 /*
 /// Add the vector v to every column of A as the column of out
 template <typename DType, typename Lang>
-void AddCol(const size_t nrow, const size_t ncol, const Tensor *A, const 
Tensor *v,
+void AddCol(const size_t nrow, const size_t ncol, const Tensor &A, const 
Tensor &v,
             Tensor *out, Context *ctx) {
   LOG(FATAL) << "AddCol Not Implemented";
 }
 // TODO(wangwei) unify AddRow and AddCol.
 /// Add the vector v to every row of A as the row of out
 template <typename DType, typename Lang>
-void AddRow(const size_t nrow, const size_t ncol, const Tensor *A, const 
Tensor *v,
+void AddRow(const size_t nrow, const size_t ncol, const Tensor &A, const 
Tensor &v,
             Tensor *out, Context *ctx) {
   LOG(FATAL) << "AddRow Not Implemented";
 }
 /// outer-product.
 /// in1 and in2 are vectors of len m and n. out is matrix of shape m * n
 template <typename DType, typename Lang>
-void Outer(const size_t m, const size_t n, const Tensor *in1, const Tensor 
*in2,
+void Outer(const size_t m, const size_t n, const Tensor &in1, const Tensor 
&in2,
            Tensor *out, Context *ctx) {
   LOG(FATAL) << "Outer Not Implemented";
 }
 
 /// Sum the columns of the in matrix into a vector
 template <typename DType, typename Lang>
-void SumColumns(const size_t nrow, const size_t ncol, const Tensor *in, Tensor 
*out,
+void SumColumns(const size_t nrow, const size_t ncol, const Tensor &in, Tensor 
*out,
                 Context *ctx) {
   LOG(FATAL) << "SumColumns Not Implemented";
 }
@@ -438,10 +407,11 @@ void Set(const DType x, Tensor *out, Context *ctx) {
 // TODO(wangwei) unify SumRow and SumCol.
 /// Sum the rows of the in matrix into a vector
 template <typename DType, typename Lang>
-void SumRows(const size_t nrow, const size_t ncol, const Tensor *in, Tensor 
*out,
+void SumRows(const size_t nrow, const size_t ncol, const Tensor &in, Tensor 
*out,
              Context *ctx) {
   LOG(FATAL) << "SumRows Not Implemented";
 }
 */
+
 }  // namespace singa
 #endif  // SINGA_CORE_MATH_H_

[08/10] incubator-singa git commit: Streamlining of tensor.h file by moving respective member functions to cpp or cuda file. Removal of shape_multipliers_ attribute in tensor.h. Changed read-in tensors to be passed as reference instead of pointer

Reply via email to