Streamlining of tensor.h file by moving respective member functions to cpp or cuda file. Removal of shape_multipliers_ attribute in tensor.h. Changed read-in tensors to be passed as reference instead of pointer
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c52e2aa3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c52e2aa3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c52e2aa3 Branch: refs/heads/master Commit: c52e2aa3b5272750960ce6d3ae9f14bad1cee397 Parents: a44d2e7 Author: Vaan Ng <[email protected]> Authored: Sun May 13 00:24:40 2018 +0800 Committer: Vaan Ng <[email protected]> Committed: Sun May 13 00:24:40 2018 +0800 ---------------------------------------------------------------------- include/singa/core/tensor.h | 152 +---- src/core/tensor/tensor.cc | 60 +- src/core/tensor/tensor_math.h | 124 ++-- src/core/tensor/tensor_math_cpp.h | 1012 +++++++++---------------------- src/core/tensor/tensor_math_cuda.h | 499 ++++++++------- 5 files changed, 647 insertions(+), 1200 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c52e2aa3/include/singa/core/tensor.h ---------------------------------------------------------------------- diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h index b94a982..e25aafd 100644 --- a/include/singa/core/tensor.h +++ b/include/singa/core/tensor.h @@ -22,7 +22,6 @@ #include <vector> #include <tuple> #include <memory> -#include <algorithm> #include "singa/core/common.h" #include "singa/core/device.h" @@ -31,7 +30,6 @@ using std::vector; using std::tuple; -using std::reverse; namespace singa { typedef vector<size_t> Shape; @@ -104,43 +102,6 @@ class Tensor { return shape_.at(idx); } - /* - cudnn requires tensor dimensions to fulfill 1 requirement: - 1.) Dimensions to be set to a minimum of 4 for 4d and lower dimensional tensors - if input tensor is 5d, cudnn will take a 5d tensor as input. Beyond 5d, certain operations are not supported. - (cudnnOp supports up to 5d, cudnnReduce supports up to 8d) - - for e.g. Tensor A has shape {3,3}, cudnn requires shape of {1,1,3,3} to be the input - Tensor B has shape (2,3,4), cudnn requires shape of {1,2,3,4} to be the input - */ - vector<int> generate_shape_cuda() const { - vector<int> shape_arr; - if(shape_.size() <= 4){ - for (size_t n=0; n<4-shape_.size(); ++n) { - shape_arr.push_back(1); - } - for (size_t n=0; n<shape_.size(); ++n) { - shape_arr.push_back(shape_.at(n)); - } - return shape_arr; - } else if(shape_.size() == 5){ - for (size_t n=0; n<shape_.size(); ++n) { - shape_arr.push_back(shape_.at(n)); - } - return shape_arr; - } else { - LOG(FATAL) << "Dimensions (shape) beyond 5 are currently not supported" ; - } - } - - int generate_dim_cuda() const { - if(shape_.size() <= 4){return 4;} - else if(shape_.size() == 5){return 5;} - else{ - LOG(FATAL) << "Dimensions (shape) beyond 5 are currently not supported" ; - } - } - size_t nDim() const { return shape_.size(); } bool empty() const { return nDim() == 0; } @@ -150,40 +111,6 @@ class Tensor { const vector<int>& strides() const { return strides_; } - /* - cudnn requires stride dimensions to conform to the format of the shape input as well - 1.) Stride dimensions to be set to a minimum of 4 for 4d and lower dimensional tensors - If input tensor is 5d, cudnn will take a 5d tensor as input. Beyond 5d, certain operations are not supported. - (cudnnOp supports up to 5d, cudnnReduce supports up to 8d) - - for e.g. Tensor A has shape {3,3}, stride {3,1}, cudnn requires shape {1,1,3,3} and stride {9, 9, 3, 1} or {9, 9, 1, 3} to be the inputs - */ - vector<int> generate_strides_cuda() const { - vector<int> strides_arr; - int product = 1; - for (size_t n=0; n<(shape_.size()); ++n) { - product *= shape_[n]; - } - if(shape_.size() <= 4){ - for (size_t n=0; n<4-shape_.size(); ++n) { - strides_arr.push_back(product); - } - for (size_t n=0; n<strides_.size(); ++n) { - strides_arr.push_back(strides_[n]); - } - return strides_arr; - } else if(shape_.size() == 5){ - for (size_t n=0; n<strides_.size(); ++n) { - strides_arr.push_back(strides_[n]); - } - return strides_arr; - } else { - LOG(FATAL) << "Dimensions (strides) beyond 5 are currently not supported" ; - } - } - - const vector<int>& shape_multipliers() const { return shape_multipliers_; } - /// return true if the content of the tensor is initialized bool initailized() const { return block_ != nullptr && block_->initialized(); @@ -292,7 +219,7 @@ class Tensor { float L2() const; //generate strides automatically if stride field is not passed -void Generate_Strides(){ +void generate_strides(){ if(shape_.size()==0){ strides_ = {1}; return void(); @@ -306,84 +233,11 @@ void Generate_Strides(){ } }; -void Set_Strides(const vector<int> new_strides){ +void set_strides(const vector<int> new_strides){ strides_ = new_strides; } -//generate shape multipliers -//for e.g. tensor of shape (3,3), stride (1,3) will have shape multipliers of (3,1) -//for e.g. tensor of shape (3,3), stride (3,1) will also have shape multipliers of (3,1) -//this means that the 3rd, 6th, and 9th index of the array will always be the starting element of their respective rows -//so we need to need use the inner stride when jumping from 1st->2nd element, and outer stride when jumping from 2nd->3rd -vector<int> Generate_Shape_Multipliers(Shape y_shape) const { - if(y_shape.size()==0){ - return {1}; - } - reverse(y_shape.begin(), y_shape.end()); - vector<int> shape_multipliers = {}; - int cumulative_product = 1; - - shape_multipliers.push_back(1); - for (size_t n=0; n<(y_shape.size()-1); ++n) { - cumulative_product = cumulative_product*y_shape[n]; - shape_multipliers.push_back(cumulative_product); - } - reverse(shape_multipliers.begin(), shape_multipliers.end()); - return shape_multipliers; -}; - -// ****************************************************************************************** -// Some traversal operations (works on const declarations without modifying tensor variables) -// ****************************************************************************************** - -//generate a traversal_info vector based on the tensor's shape for the traverse_next function to work -vector<int> generate_traversal_info() const { - vector<int> traversal_info = {}; - for(size_t n=0; n<(shape_.size()+2); ++n) { - traversal_info.push_back(0); - } - return traversal_info; -}; - -//this function checks whether the next index falls on a special multiplier of the outer shape -//so the algorithm knows when to jump over/back to a starting element of the outer shape -//for e.g. in [[1,4,7], [2,5,8], [3,6,9]], elements 1,2,3 are the starting elements of their respective rows -//this additional check only has 1 loop for 2d matrix -//but runtime performance might degrade to O(nlog(n)) for higher dimensional tensors -int determine_order(int counter) const { - for (size_t n=0; n<(shape_multipliers_.size()-1); ++n) { - if((counter%shape_multipliers_[n])==0){ - return ((shape_multipliers_.size()) - 1 - n); - } - } - return 0; -}; - -//this function updates the base indexes with the current index after every single traversal step, can be generalized beyond 2d cases -void update_base_index(std::vector<int>& traversal_info) const { - for (int n=0; n<(traversal_info[shape_.size()+1]+1); ++n) { - traversal_info[n] = traversal_info[shape_.size()]; - } -}; - -//function to traverse a const strided tensor object -//it requires an additional vector, traversal_info {0,0,0,0 ...}, comprising (shape_.size()+2) elements of 0 -//for e.g. 2d matrix: -//index 0 and 1 store the base row and column index respectively -//index 2 stores the current index of the traversal -//index 3 stores the order of the traversal for e.g. if the order is 0, it means the next element can be navigated to using the innermost stride -void traverse_next(std::vector<int>& traversal_info, int counter) const { - update_base_index(traversal_info); - traversal_info[shape_.size()+1] = determine_order(counter); - traversal_info[shape_.size()] = traversal_info[traversal_info[shape_.size()+1]]+strides_[strides_.size()-traversal_info[shape_.size()+1]-1]; -}; - -// ****************************************************************************************** -// traversal operations end -// ****************************************************************************************** - protected: - //bool transpose_ = false; DataType data_type_ = kFloat32; std::shared_ptr<Device> device_ = nullptr; /// Note: block_ is allocated in lazy manner to avoid frequent malloc/free. @@ -391,8 +245,6 @@ void traverse_next(std::vector<int>& traversal_info, int counter) const { Block *block_ = nullptr; Shape shape_ = {}; vector<int> strides_ = {}; - vector<int> shape_multipliers_ = {}; - }; //end of tensor class typedef Shape::iterator ShapeIter; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c52e2aa3/src/core/tensor/tensor.cc ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc index 9067242..a4efd64 100644 --- a/src/core/tensor/tensor.cc +++ b/src/core/tensor/tensor.cc @@ -34,7 +34,6 @@ Tensor::~Tensor() { Tensor::Tensor() { device_ = defaultDevice; strides_ = {1}; - shape_multipliers_ = {1}; } //non-strided constructors @@ -43,16 +42,14 @@ Tensor::Tensor(const Shape &shape, DataType dtype) size_t size = Product(shape_) * SizeOf(data_type_); if (size) block_ = device_->NewBlock((int)size); - Generate_Strides(); - shape_multipliers_ = Generate_Shape_Multipliers(shape_); + generate_strides(); } Tensor::Tensor(Shape &&shape, DataType dtype) : data_type_(dtype), device_(defaultDevice), shape_(shape) { size_t size = Product(shape_) * SizeOf(data_type_); if (size) block_ = device_->NewBlock((int)size); - Generate_Strides(); - shape_multipliers_ = Generate_Shape_Multipliers(shape_); + generate_strides(); } //non-strided constructors with device @@ -62,16 +59,14 @@ Tensor::Tensor(const Shape &shape, std::shared_ptr<Device> device, size_t size = Product(shape_) * SizeOf(data_type_); if (size) block_ = device_->NewBlock((int)size); - Generate_Strides(); - shape_multipliers_ = Generate_Shape_Multipliers(shape_); + generate_strides(); } Tensor::Tensor(Shape &&shape, std::shared_ptr<Device> device, DataType dtype) : data_type_(dtype), device_(device), shape_(shape) { size_t size = Product(shape_) * SizeOf(data_type_); if (size) block_ = device_->NewBlock((int)size); - Generate_Strides(); - shape_multipliers_ = Generate_Shape_Multipliers(shape_); + generate_strides(); } @@ -81,8 +76,7 @@ Tensor::Tensor(const Tensor &in) device_(in.device_), block_(in.block()), shape_(in.shape_), - strides_(in.strides_), - shape_multipliers_(in.shape_multipliers_) { + strides_(in.strides_) { if (block_ != nullptr) block_->IncRefCount(); } @@ -95,7 +89,6 @@ Tensor::Tensor(const Tensor &in, Shape &new_shape, vector<int> &new_strides) block_(in.block()), shape_(new_shape), strides_(new_strides) { - shape_multipliers_ = Generate_Shape_Multipliers(shape_); if (block_ != nullptr) block_->IncRefCount(); } @@ -105,8 +98,7 @@ Tensor::Tensor(Tensor &&in) data_type_(in.data_type_), device_(in.device_), shape_(std::move(in.shape_)), - strides_(in.strides_), - shape_multipliers_(in.shape_multipliers_) { + strides_(in.strides_) { block_ = in.block_; in.block_ = nullptr; } @@ -129,7 +121,6 @@ void Tensor::ResetLike(const Tensor &in) { } shape_ = in.shape_; strides_ = in.strides_; - shape_multipliers_ = in.shape_multipliers_; } //if tensor is not transposed yet i.e strides == 1, then we simply change the shape and generate new default strides @@ -146,8 +137,7 @@ void Tensor::Reshape(const Shape &shape) { LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not implemented yet." ; } shape_ = shape; - Generate_Strides(); - shape_multipliers_ = Generate_Shape_Multipliers(shape_); + generate_strides(); } void Tensor::Reshape(Shape &&shape) { @@ -162,8 +152,7 @@ void Tensor::Reshape(Shape &&shape) { LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not implemented yet." ; } shape_ = std::move(shape); - Generate_Strides(); - shape_multipliers_ = Generate_Shape_Multipliers(shape_); + generate_strides(); } void Tensor::AsType(const DataType type) { @@ -350,7 +339,6 @@ Tensor Tensor::T() const { t.strides_.clear(); t.strides_.push_back(strides_[1]); t.strides_.push_back(strides_[0]); - t.shape_multipliers_ = Generate_Shape_Multipliers(t.shape_); t.block_ = block_; block_->IncRefCount(); return t; @@ -359,7 +347,7 @@ Tensor Tensor::T() const { //normal transpose without axes Tensor Tensor::Transpose() const { // if(shape_.size() != strides_.size()) - // Generate_Strides(); + // generate_strides(); Tensor t; t.device_ = device_; @@ -369,7 +357,6 @@ Tensor Tensor::Transpose() const { t.shape_.push_back(shape_[shape_.size()-n-1]); t.strides_.push_back(strides_[shape_.size()-n-1]); } - t.shape_multipliers_ = Generate_Shape_Multipliers(t.shape_); t.block_ = block_; block_->IncRefCount(); return t; @@ -382,7 +369,7 @@ Tensor Tensor::Transpose(Shape axes) const { // return void(); // } // if(shape_.size() != strides_.size()) - // Generate_Strides(); + // generate_strides(); Tensor t; t.device_ = device_; @@ -392,7 +379,6 @@ Tensor Tensor::Transpose(Shape axes) const { t.shape_.push_back(shape_[axes[n]]); t.strides_.push_back(strides_[axes[n]]); } - t.shape_multipliers_ = Generate_Shape_Multipliers(t.shape_); t.block_ = block_; block_->IncRefCount(); return t; @@ -564,7 +550,7 @@ float Tensor::L1() const { TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, { device_->Exec([&nrm, this](Context *ctx) { DType ret = DType(0); - Asum<DType, Lang>(this, &ret, ctx); + Asum<DType, Lang>(*this, &ret, ctx); nrm = TypeCast<DType, float>(ret); }, {this->block()}, {}); }); @@ -577,7 +563,7 @@ float Tensor::L2() const { TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, { device_->Exec([&nrm, this](Context *ctx) { DType ret = DType(0); - Nrm2<DType, Lang>(this, &ret, ctx); + Nrm2<DType, Lang>(*this, &ret, ctx); nrm = TypeCast<DType, float>(ret); }, {this->block()}, {}); }); @@ -603,7 +589,7 @@ template void Tensor::SetValue<int>(const int x); do { \ TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { \ ret->device()->Exec([t, ret](Context * ctx) { \ - fn<DType, Lang>(&t, ret, ctx); \ + fn<DType, Lang>(t, ret, ctx); \ }, {t.block()}, {ret->block()}); \ }); \ } while (0) @@ -632,7 +618,7 @@ GenUnaryTensorFn(Tanh); TYPE_LANG_SWITCH(lhs.data_type(), DType, lhs.device()->lang(), Lang, { \ CHECK_EQ(sizeof(DType), SizeOf(rhs.data_type())); \ ret->device()->Exec([lhs, rhs, ret](Context * ctx) { \ - fn<DType, Lang>(&lhs, &rhs, ret, \ + fn<DType, Lang>(lhs, rhs, ret, \ ctx); \ }, {lhs.block(), rhs.block()}, {ret->block()}); \ }); \ @@ -663,7 +649,7 @@ GenBinaryTensorFn(operator>=, GE); static_assert(std::is_same<SType, DType>::value, \ "The Scalar type must match the Tensor data type"); \ ret->device()->Exec([t, x, ret](Context * ctx) { \ - fn<DType, Lang>(&t, x, ret, ctx); \ + fn<DType, Lang>(t, x, ret, ctx); \ }, {t.block()}, {ret->block()}); \ }); \ } while (0) @@ -706,7 +692,7 @@ void Div(const SType alpha, const Tensor &in, Tensor *out) { TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, { // TODO(wangwei) type cast SType to DType; in.device()->Exec([alpha, in, out](Context *ctx) { - Div<DType, Lang>(alpha, &in, out, ctx); + Div<DType, Lang>(alpha, in, out, ctx); }, {in.block()}, {out->block()}); }); } @@ -743,7 +729,7 @@ float Sum<float>(const Tensor &in) { TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, { one.device()->Exec([in, one, &s](Context *ctx) { DType ret = DType(0); - Dot<DType, Lang>(&in, &one, &ret, ctx); + Dot<DType, Lang>(in, one, &ret, ctx); s = ret; }, {in.block(), one.block()}, {}); }); @@ -776,7 +762,7 @@ Tensor RowMax(const Tensor &in) { //size_t nrow = 1; //if (in.nDim() > 1) nrow = in.shape(0); //size_t ncol = in.Size() / nrow; - RowMax<DType, Lang>(&in, &ret, ctx); + RowMax<DType, Lang>(in, &ret, ctx); }, {in.block()}, {ret.block()}); }); return ret; @@ -1012,7 +998,7 @@ void MultColumn(const Tensor &v, Tensor *M) { CheckDataTypeAndLang(*M, v); TYPE_LANG_SWITCH(v.data_type(), DType, v.device()->lang(), Lang, { v.device()->Exec([M, v](Context *ctx) { - DGMM<DType, Lang>(false, M, &v, + DGMM<DType, Lang>(false, *M, v, M, ctx); }, {M->block(), v.block()}, {M->block()}); }); @@ -1027,7 +1013,7 @@ void MultRow(const Tensor &v, Tensor *M) { CheckDataTypeAndLang(*M, v); TYPE_LANG_SWITCH(v.data_type(), DType, v.device()->lang(), Lang, { v.device()->Exec([M, v](Context *ctx) { - DGMM<DType, Lang>(true, M, &v, + DGMM<DType, Lang>(true, *M, v, M, ctx); }, {M->block(), v.block()}, {M->block()}); }); @@ -1113,7 +1099,7 @@ void Axpy(const SType alpha, const Tensor &in, Tensor *out) { TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, { auto a = TypeCast<SType, DType>(alpha); out->device()->Exec([a, in, out](Context *ctx) { - Axpy<DType, Lang>(a, &in, out, ctx); + Axpy<DType, Lang>(a, in, out, ctx); }, {in.block(), out->block()}, {out->block()}); }); } @@ -1143,7 +1129,7 @@ void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta, auto a = TypeCast<SType, DType>(alpha); auto b = TypeCast<SType, DType>(beta); C->device()->Exec([a, A, b, B, C](Context *ctx) { - GEMV<DType, Lang>(a, &A, &B, b, C, ctx); + GEMV<DType, Lang>(a, A, B, b, C, ctx); }, {A.block(), B.block()}, {C->block()}); }); } else { @@ -1152,7 +1138,7 @@ void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta, auto a = TypeCast<SType, DType>(alpha); auto b = TypeCast<SType, DType>(beta); C->device()->Exec([a, A, b, B, C](Context *ctx) { - GEMM<DType, Lang>(a, &A, &B, b, C, + GEMM<DType, Lang>(a, A, B, b, C, ctx); }, {A.block(), B.block()}, {C->block()}); }); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c52e2aa3/src/core/tensor/tensor_math.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h index c403f30..c7fdfe5 100644 --- a/src/core/tensor/tensor_math.h +++ b/src/core/tensor/tensor_math.h @@ -40,7 +40,7 @@ namespace singa { /// 4. Function argument names, use 'num' for total number of elements in /// elementwise operations; use 'in1' 'in2' for in Tensors; use 'out' for /// output Tensor or value. With exceptions for some functions, e.g., -/// Scale(const float alpha, const Tensor* in, Tensor* out); +/// Scale(const float alpha, const Tensor &in, Tensor* out); /// For such cases, use x, v, alpha, etc for scalar types. /// For blas functions, follow the blas style for argument names. /// Use 'M' and 'v' for matrix and vector tensors in functions involving both @@ -50,37 +50,6 @@ namespace singa { /// 7. Use size_t for the number of elements, rows or columns. /// 8. Use the same name for the Tensor and Tensor level math functions. -// template <typename DType> -// void TraverseUnary(const Tensor* in, Tensor* out, std::function<DType(DType)> func){} - -// template <typename DType> -// void TraverseBinary(const Tensor* in1, const Tensor* in2, Tensor* out, std::function<DType(DType, DType)> func){} - -template <typename DType> -void TraverseUnary(const Tensor* in, Tensor* out, std::function<DType(DType)> func){ - DType *outPtr = static_cast<DType *>(out->block()->mutable_data()); - const DType *inPtr = static_cast<const DType *>(in->block()->data()); - vector<int> traversal_info = in->generate_traversal_info(); - for (size_t i = 0; i < in->Size(); i++) { - outPtr[i] = func(inPtr[traversal_info[in->shape().size()]]); - in->traverse_next(traversal_info, i+1); - } -} - -template <typename DType> -void TraverseBinary(const Tensor* in1, const Tensor* in2, Tensor* out, std::function<DType(DType, DType)> func){ - DType *outPtr = static_cast<DType *>(out->block()->mutable_data()); - const DType *in1Ptr = static_cast<const DType *>(in1->block()->data()); - const DType *in2Ptr = static_cast<const DType *>(in2->block()->data()); - vector<int> traversal_info_in1 = in1->generate_traversal_info(); - vector<int> traversal_info_in2 = in2->generate_traversal_info(); - for (size_t i = 0; i < in1->Size(); i++) { - outPtr[i] = func(in1Ptr[traversal_info_in1[in1->shape().size()]], in2Ptr[traversal_info_in2[in2->shape().size()]]); - in1->traverse_next(traversal_info_in1, i+1); - in2->traverse_next(traversal_info_in2, i+1); - } -} - // ************************************** // Element-wise functions @@ -88,41 +57,41 @@ void TraverseBinary(const Tensor* in1, const Tensor* in2, Tensor* out, std::func /// out[i] = |in[i]| template <typename DType, typename Lang> -void Abs(const Tensor *in, Tensor *out, Context *ctx) { +void Abs(const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Abs Not Implemented"; } /// out[i] = in[i] + x template <typename DType, typename Lang> -void Add(const Tensor *in, const DType x, Tensor *out, +void Add(const Tensor &in, const DType x, Tensor *out, Context *ctx) { LOG(FATAL) << "Add Not Implemented"; } /// out[i] = in1[i] + in2[i] template <typename DType, typename Lang> -void Add(const Tensor *in1, const Tensor *in2, Tensor *out, +void Add(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "Add-Pair Not Implemented"; } /// Clamp every element into [low, high] /// if in[i]>high, then out[i]=high; if in[i]<low, then out[i]=low. template <typename DType, typename Lang> -void Clamp(const DType low, const DType high, const Tensor *in, +void Clamp(const DType low, const DType high, const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Clamp Not Implemented"; } /// out[i] = x / in[i] template <typename DType, typename Lang> -void Div(const DType x, const Tensor *in, Tensor *out, +void Div(const DType x, const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Div Not Implemented"; } /// out[i] = in[i] / x template <typename DType, typename Lang> -void Div(const Tensor *in, const DType x, Tensor *out, +void Div(const Tensor &in, const DType x, Tensor *out, Context *ctx) { CHECK_NE(x, 0.f); EltwiseMult<DType, Lang>(in, DType(1) / x, out, ctx); @@ -130,101 +99,101 @@ void Div(const Tensor *in, const DType x, Tensor *out, /// out[i] = in1[i] / in2[i] template <typename DType, typename Lang> -void Div(const Tensor *in1, const Tensor *in2, Tensor *out, +void Div(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "Div-Pair Not Implemented"; } /// out[i] = in[i] * x template <typename DType, typename Lang> -void EltwiseMult(const Tensor *in, const DType x, Tensor *out, +void EltwiseMult(const Tensor &in, const DType x, Tensor *out, Context *ctx) { LOG(FATAL) << "EltwiseMult Not Implemented"; } /// out[i] = in1[i] * in2[i] template <typename DType, typename Lang> -void EltwiseMult(const Tensor *in1, const Tensor *in2, Tensor *out, +void EltwiseMult(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "EltwiseMult-Pair Not Implemented"; } /// Base is e, Neper number. out[i]=exp(in[i]) template <typename DType, typename Lang> -void Exp(const Tensor *in, Tensor *out, Context *ctx) { +void Exp(const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Exp Not Implemented"; } /// out[i]=(in[i]<=x)?1.f:0.f template <typename DType, typename Lang> -void LE(const Tensor *in, const DType x, Tensor *out, +void LE(const Tensor &in, const DType x, Tensor *out, Context *ctx) { LOG(FATAL) << "LE Not Implemented"; } /// out[i]=(in1[i]<=in2[i])?1.f:0.f template <typename DType, typename Lang> -void LE(const Tensor *in1, const Tensor *in2, Tensor *out, +void LE(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "Tensor-Tensor LE Not Implemented"; } /// Natual logarithm, the base is e, Neper number out[i]=log(in[i]). template <typename DType, typename Lang> -void Log(const Tensor *in, Tensor *out, Context *ctx) { +void Log(const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Log Not Implemented"; } /// out[i]=(in[i]<x)?1.f:0.f template <typename DType, typename Lang> -void LT(const Tensor *in, const DType x, Tensor *out, +void LT(const Tensor &in, const DType x, Tensor *out, Context *ctx) { LOG(FATAL) << "LT Not Implemented"; } /// out[i]=(in1[i]<in2[i])?1.f:0.f template <typename DType, typename Lang> -void LT(const Tensor *in1, const Tensor *in2, Tensor *out, +void LT(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "Tensor-Tensor LT Not Implemented"; } /// out[i]=(in[i]>=x)?1.f:0.f template <typename DType, typename Lang> -void GE(const Tensor *in, const DType x, Tensor *out, +void GE(const Tensor &in, const DType x, Tensor *out, Context *ctx) { LOG(FATAL) << "GE Not Implemented"; } /// out[i]=(in1[i]>=in2[i])?1.f:0.f template <typename DType, typename Lang> -void GE(const Tensor *in1, const Tensor *in2, Tensor *out, +void GE(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "Tensor-Tensor GE Not Implemented"; } /// out[i]=(in[i]>x)?1.f:0.f template <typename DType, typename Lang> -void GT(const Tensor *in, const DType x, Tensor *out, +void GT(const Tensor &in, const DType x, Tensor *out, Context *ctx) { LOG(FATAL) << "GT Not Implemented"; } /// out[i]=(in[i]>in2[i])?1.f:0.f template <typename DType, typename Lang> -void GT(const Tensor *in, const Tensor *in2, Tensor *out, +void GT(const Tensor &in, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "Tensor-Tensor GT Not Implemented"; } /// out[i] = pow(in[i], x) template <typename DType, typename Lang> -void Pow(const Tensor *in, const DType x, Tensor *out, +void Pow(const Tensor &in, const DType x, Tensor *out, Context *ctx) { LOG(FATAL) << "Pow Not Implemented"; } /// out[i]=pow(in1[i], in2[i]) template <typename DType, typename Lang> -void Pow(const Tensor *in1, const Tensor *in2, Tensor *out, +void Pow(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "Pow-Pair Not Implemented"; } /// out[i]=max(0, in[i]) template <typename DType, typename Lang> -void ReLU(const Tensor *in, Tensor *out, Context *ctx) { +void ReLU(const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "ReLU Not Implemented"; } @@ -235,50 +204,50 @@ void Set(const DType x, Tensor *out, Context *ctx) { } /// out[i]=sigmoid(in[i]) template <typename DType, typename Lang> -void Sigmoid(const Tensor *in, Tensor *out, Context *ctx) { +void Sigmoid(const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Sigmoid Not Implemented"; } /// out[i] = sign(in[i]) template <typename DType, typename Lang> -void Sign(const Tensor *in, Tensor *out, Context *ctx) { +void Sign(const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Sign Not Implemented"; } /// out[i]=sqrt(in[i]) template <typename DType, typename Lang> -void Sqrt(const Tensor *in, Tensor *out, Context *ctx) { +void Sqrt(const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Sqrt Not Implemented"; } /// out[i]=square(in[i]) template <typename DType, typename Lang> -void Square(const Tensor *in, Tensor *out, Context *ctx) { +void Square(const Tensor &in, Tensor *out, Context *ctx) { EltwiseMult<DType, Lang>(in, in, out, ctx); } /// out[i] = in[i] - x template <typename DType, typename Lang> -void Sub(const Tensor *in, const DType x, Tensor *out, +void Sub(const Tensor &in, const DType x, Tensor *out, Context *ctx) { Add<DType, Lang>(in, -x, out, ctx); } /// out[i] = in1[i] - in2[i] template <typename DType, typename Lang> -void Sub(const Tensor *in1, const Tensor *in2, Tensor *out, +void Sub(const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "Sub-Pair Not Implemented"; } /// sum all elements of in into out template <typename DType, typename Lang> -void Sum(const Tensor *in, DType *out, Context *ctx) { +void Sum(const Tensor &in, DType *out, Context *ctx) { LOG(FATAL) << "Sum Not Implemented"; } /// out[i]=tanh(in[i]) template <typename DType, typename Lang> -void Tanh(const Tensor *in, Tensor *out, Context *ctx) { +void Tanh(const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Tanh Not Implemented"; } @@ -313,31 +282,31 @@ void Uniform(const float low, const float high, Tensor *out, /// outurn the index of the element with the max value. template <typename DType, typename Lang> -void Amax(const Tensor *in, size_t *out, Context *ctx) { +void Amax(const Tensor &in, size_t *out, Context *ctx) { LOG(FATAL) << "Amax Not Implemented"; } /// outurn the index of the element with the min value. template <typename DType, typename Lang> -void Amin(const Tensor *in, size_t *out, Context *ctx) { +void Amin(const Tensor &in, size_t *out, Context *ctx) { LOG(FATAL) << "Amin Not Implemented"; } /// out = sum |x| for all x in in template <typename DType, typename Lang> -void Asum(const Tensor *in, DType *out, Context *ctx) { +void Asum(const Tensor &in, DType *out, Context *ctx) { LOG(FATAL) << "Asum Not Implemented"; } /// out = alpha * in + out template <typename DType, typename Lang> -void Axpy(const DType alpha, const Tensor *in, Tensor *out, +void Axpy(const DType alpha, const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "Axpy Not Implemented"; } /// out = ||in||_2^2, i.e, L2 norm. template <typename DType, typename Lang> -void Nrm2(const Tensor *in, float *out, Context *ctx) { +void Nrm2(const Tensor &in, float *out, Context *ctx) { LOG(FATAL) << "Nrm2 Not Implemented"; } @@ -349,7 +318,7 @@ void Scale(const DType x, Tensor *out, Context *ctx) { /// inner product of array in1 and in2 template <typename DType, typename Lang> -void Dot(const Tensor *in1, const Tensor *in2, DType *out, +void Dot(const Tensor &in1, const Tensor &in2, DType *out, Context *ctx) { LOG(FATAL) << "Dot Not Implemented"; } @@ -358,7 +327,7 @@ void Dot(const Tensor *in1, const Tensor *in2, DType *out, /// transA indicates if the internal data layout is transposed of A template <typename DType, typename Lang> void GEMV(const DType alpha, - const Tensor *A, const Tensor *v, const DType beta, Tensor *out, + const Tensor &A, const Tensor &v, const DType beta, Tensor *out, Context *ctx) { LOG(FATAL) << "GEMV Not Implemented"; } @@ -367,7 +336,7 @@ void GEMV(const DType alpha, /// if matrix_lef_side is true, do M*v; else do v*M template <typename DType, typename Lang> void DGMM(const bool side_right, - const Tensor *M, const Tensor *v, Tensor *out, Context *ctx) { + const Tensor &M, const Tensor &v, Tensor *out, Context *ctx) { LOG(FATAL) << "DGMM Not Implemented"; } @@ -375,7 +344,7 @@ void DGMM(const bool side_right, /// transA indicates if the internal data layout is transposed of A template <typename DType, typename Lang> void GEMM(const DType alpha, - const Tensor *A, const Tensor *B, const DType beta, Tensor *C, + const Tensor &A, const Tensor &B, const DType beta, Tensor *C, Context *ctx) { LOG(FATAL) << "GEMM Not Implemented"; } @@ -396,7 +365,7 @@ void SoftmaxCrossEntropyBwd(bool int_target, const size_t batchsize, } template <typename DType, typename Lang> -void RowMax(const Tensor *in, Tensor *out, Context* ctx) { +void RowMax(const Tensor &in, Tensor *out, Context* ctx) { LOG(FATAL) << "Not Implemented"; } // ************************************** @@ -405,28 +374,28 @@ void RowMax(const Tensor *in, Tensor *out, Context* ctx) { /* /// Add the vector v to every column of A as the column of out template <typename DType, typename Lang> -void AddCol(const size_t nrow, const size_t ncol, const Tensor *A, const Tensor *v, +void AddCol(const size_t nrow, const size_t ncol, const Tensor &A, const Tensor &v, Tensor *out, Context *ctx) { LOG(FATAL) << "AddCol Not Implemented"; } // TODO(wangwei) unify AddRow and AddCol. /// Add the vector v to every row of A as the row of out template <typename DType, typename Lang> -void AddRow(const size_t nrow, const size_t ncol, const Tensor *A, const Tensor *v, +void AddRow(const size_t nrow, const size_t ncol, const Tensor &A, const Tensor &v, Tensor *out, Context *ctx) { LOG(FATAL) << "AddRow Not Implemented"; } /// outer-product. /// in1 and in2 are vectors of len m and n. out is matrix of shape m * n template <typename DType, typename Lang> -void Outer(const size_t m, const size_t n, const Tensor *in1, const Tensor *in2, +void Outer(const size_t m, const size_t n, const Tensor &in1, const Tensor &in2, Tensor *out, Context *ctx) { LOG(FATAL) << "Outer Not Implemented"; } /// Sum the columns of the in matrix into a vector template <typename DType, typename Lang> -void SumColumns(const size_t nrow, const size_t ncol, const Tensor *in, Tensor *out, +void SumColumns(const size_t nrow, const size_t ncol, const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "SumColumns Not Implemented"; } @@ -438,10 +407,11 @@ void Set(const DType x, Tensor *out, Context *ctx) { // TODO(wangwei) unify SumRow and SumCol. /// Sum the rows of the in matrix into a vector template <typename DType, typename Lang> -void SumRows(const size_t nrow, const size_t ncol, const Tensor *in, Tensor *out, +void SumRows(const size_t nrow, const size_t ncol, const Tensor &in, Tensor *out, Context *ctx) { LOG(FATAL) << "SumRows Not Implemented"; } */ + } // namespace singa #endif // SINGA_CORE_MATH_H_
