SINGA-182 Clean math function APIs and implementations Clean tensor.h/.cc and tensor_math.h, tensor_math_cpp.h: re-order the functions by (type, name), where type is a) element-wise function b) matrix function c) random function d) blas function
Implement GEMV using cblas and cublas. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/564c88ad Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/564c88ad Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/564c88ad Branch: refs/heads/master Commit: 564c88ad95e976e6067198c832f4fcd9a8878cd7 Parents: 07c49da Author: wangwei <[email protected]> Authored: Fri Jun 10 23:12:09 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Sun Jun 12 12:15:11 2016 +0800 ---------------------------------------------------------------------- include/singa/core/tensor.h | 396 +++++++++--------- src/core/tensor/tensor.cc | 688 ++++++++++++++++---------------- src/core/tensor/tensor_math.h | 336 ++++++++-------- src/core/tensor/tensor_math_cpp.h | 640 +++++++++++++++-------------- src/core/tensor/tensor_math_cuda.h | 158 ++++---- test/singa/test_tensor_math.cc | 15 +- 6 files changed, 1131 insertions(+), 1102 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/564c88ad/include/singa/core/tensor.h ---------------------------------------------------------------------- diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h index bb8d7f8..82bbe81 100644 --- a/include/singa/core/tensor.h +++ b/include/singa/core/tensor.h @@ -32,17 +32,6 @@ using std::tuple; namespace singa { typedef vector<size_t> Shape; -typedef Shape::iterator ShapeIter; -inline size_t Product(const Shape &shape, int start = 0, size_t len = 0) { - if (len == 0) - len = shape.size(); - CHECK_LE(len, shape.size()); - size_t v = 1; - for (unsigned int i = start; i < len; i++) - v *= shape[i]; - return v; -} - /// hardcode the width of types defined in DataType const size_t kDataWidth[] = {sizeof(float), sizeof(float) / 2, sizeof(int), sizeof(char), sizeof(double)}; @@ -65,10 +54,10 @@ class Tensor { public: ~Tensor(); Tensor(); - explicit Tensor(Shape &&shape, DataType dtype = kFloat32); - explicit Tensor(const Shape &shape, DataType dtype = kFloat32); - Tensor(Shape &&shape, Device *dev, DataType dtype = kFloat32); - Tensor(const Shape &shape, Device *dev, DataType dtype = kFloat32); + explicit Tensor(Shape &&shape, const DataType dtype = kFloat32); + explicit Tensor(const Shape &shape, const DataType dtype = kFloat32); + Tensor(Shape &&shape, Device *dev, const DataType dtype = kFloat32); + Tensor(const Shape &shape, Device *dev, const DataType dtype = kFloat32); /// Copy Tensor to share the internal data. No deep copy. Tensor(const Tensor &from); @@ -82,10 +71,10 @@ class Tensor { Device *device() const { return device_; } - /// Return immutable Tensor values with given type. - template <typename DType> - DType data() const { - return static_cast<DType>(blob()->data()); + /// return immutable Tensor values with given type. + template <typename SType> + SType data() const { + return static_cast<SType>(blob()->data()); } /// data type, including kFloat16, kFloat32, kInt @@ -93,7 +82,7 @@ class Tensor { const Shape &shape() const { return shape_; } - const size_t shape(size_t idx) const { + const size_t shape(const size_t idx) const { CHECK_LT(idx, shape_.size()); return shape_.at(idx); } @@ -102,13 +91,13 @@ class Tensor { bool transpose() const { return transpose_; } - /// Return number of total elements + /// return number of total elements size_t Size() const { CHECK_EQ(blob_->size() % SizeOf(data_type_), 0u); return blob_->size() / SizeOf(data_type_); } - /// Return memory size (i.e., Bytes) + /// return memory size (i.e., Bytes) size_t MemSize() const { return blob_->size(); } /// Reset the tensor shape, it may reallocate blob, if MemSize() changes. @@ -121,7 +110,7 @@ class Tensor { void ResetLike(const Tensor &t); /// Reset the data type, it would reallocate blob if type changes. - void AsType(DataType type); + void AsType(const DataType type); /// Reset the device. /// If the target device is a diff device, then do deep data copy. @@ -135,14 +124,14 @@ class Tensor { void SetValue(const SType x); /// For init the tensor values, copy 'num' elements. - template <typename DType> - void CopyDataFromHostPtr(const DType *src, size_t num); + template <typename SType> + void CopyDataFromHostPtr(const SType *src, const size_t num); /// Copy data from another Tensor which may be on a diff device. /// Meta data would not be copied! void CopyData(const Tensor &other); - /// Return an exactly the same Tensor with data been deep copied. + /// return an exactly the same Tensor with data been deep copied. Tensor Clone() const; // Tensor operations @@ -152,42 +141,37 @@ class Tensor { Tensor T() const; /// Copy the meta info with data blob shared. - Tensor &operator=(const Tensor &t); + Tensor &operator=(const Tensor &in); /// Copy the meta info with data blob shared. - Tensor &operator=(Tensor &&t); + Tensor &operator=(Tensor &&in); - Tensor &operator+=(const Tensor &t); - // void operator+=(Tensor&& t); - Tensor &operator-=(const Tensor &t); - // void operator-=(Tensor&& t); - Tensor &operator*=(const Tensor &t); - // void operator*=(Tensor&& t); - Tensor &operator/=(const Tensor &t); - // void operator/=(Tensor&& t); + Tensor &operator+=(const Tensor &in); + // void operator+=(Tensor&& in); + Tensor &operator-=(const Tensor &in); + // void operator-=(Tensor&& in); + Tensor &operator*=(const Tensor &in); + // void operator*=(Tensor&& in); + Tensor &operator/=(const Tensor &in); + // void operator/=(Tensor&& in); // Scalar operations. - /// T is a scalar type - template <typename DType> - Tensor &operator+=(DType x); - - /// T is a scalar type - template <typename DType> - Tensor &operator-=(const DType x); + /// SType is a scalar type + template <typename SType> + Tensor &operator+=(const SType x); - /// T is a scalar type - template <typename DType> - Tensor &operator*=(const DType x); + /// SType is a scalar type + template <typename SType> + Tensor &operator-=(const SType x); - /// T is a scalar type - template <typename DType> - Tensor &operator/=(const DType x); + /// SType is a scalar type + template <typename SType> + Tensor &operator*=(const SType x); - /// save Tensor into a proto msg - // void ToProto(TensorProto* t); - /// load Tensor from proto msg - // void FromProto(const TensorProto& t); + /// SType is a scalar type + template <typename SType> + Tensor &operator/=(const SType x); protected: bool transpose_ = false; @@ -196,14 +180,29 @@ class Tensor { /// Note: blob_ is allocated in lazy manner to avoid frequent malloc/free. /// If you want to get an allocated Blob, use blob() instead of blob_. Blob *blob_ = nullptr; - Shape shape_; + Shape shape_ = {}; }; +typedef Shape::iterator ShapeIter; +inline size_t Product(const Shape &shape, int start = 0, size_t len = 0) { + if (len == 0) len = shape.size(); + CHECK_LE(len, shape.size()); + size_t v = 1; + for (unsigned int i = start; i < len; i++) v *= shape[i]; + return v; +} + inline void CheckDataTypeAndLang(const Tensor &in1, const Tensor &in2) { CHECK_EQ(in1.data_type(), in2.data_type()); CHECK_EQ(in1.device()->lang(), in2.device()->lang()); } +template <typename FromType, typename ToType> +ToType TypeCast(const FromType &x) { + // TODO(wangwei) cast fp16; prevent some casts, e.g., float to char + return static_cast<ToType>(x); +} + Tensor Reshape(const Tensor &in, const Shape &s); Tensor Reshape(const Tensor &in, Shape &&s); @@ -212,192 +211,171 @@ Tensor Reshape(const Tensor &in, Shape &&s); /// Copy 'num' elements of src to dst. /// The first 'src_offset' ('dst_offset') elements will be skipped. -void CopyDataToFrom(Tensor *dst, const Tensor &src, size_t num, - size_t src_offset = 0, size_t dst_offset = 0); - -// ==================Simple Linear Algebra Operations========================= -Tensor Abs(const Tensor &t); -Tensor Exp(const Tensor &t); -Tensor Log(const Tensor &t); -Tensor ReLU(const Tensor &t); -Tensor Sigmoid(const Tensor &t); -Tensor Sign(const Tensor &t); -Tensor Sqrt(const Tensor &t); -Tensor Square(const Tensor &t); -Tensor Tanh(const Tensor &t); +void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num, + const size_t src_offset = 0, const size_t dst_offset = 0); + +// =============Element-wise operations==================================== +Tensor Abs(const Tensor &in); +Tensor Exp(const Tensor &in); +Tensor Log(const Tensor &in); +Tensor ReLU(const Tensor &in); +Tensor Sigmoid(const Tensor &in); +Tensor Sign(const Tensor &in); +Tensor Sqrt(const Tensor &in); +Tensor Square(const Tensor &in); +Tensor Tanh(const Tensor &in); + +/// Element-wise opeartion, out[i]=in[i]^x +template <typename SType> +Tensor Pow(const Tensor &in, const SType x); +/// Element-wise opeartion, out[i]=in[i]^x +template <typename SType> +void Pow(const Tensor &in, const SType x, Tensor *out); +/// Element-wise opeartion, out[i]=baes[i]^exp[i] +Tensor Pow(const Tensor &base, const Tensor &exp); +/// Element-wise opeartion, out[i]=baes[i]^exp[i] +void Pow(const Tensor &base, const Tensor &exp, Tensor *out); +/// Element-wise operation, out[i]= (in[i] < x) ? 1.f : 0.f template <typename SType> -SType Sum(const Tensor &t); -/// Sum elements in the Tensor, currently only support vector and matrix. -/// if 'axis' is 0, sum all rows into a single row -/// if 'axis' is 1, sum all columns into a single column -/// TODO(wangwei) support arbitrary Tensor like numpy.sum -Tensor Sum(const Tensor &t, int axis); +Tensor operator<(const Tensor &in, const SType x); +template <typename SType> +void LT(const Tensor &in, const SType x, Tensor *out); -/// Average elements in the Tensor, currently only support vector and matrix. -/// if 'axis' is 0, average all rows into a single row -/// if 'axis' is 1, average all columns into a single column -/// TODO(wangwei) support arbitrary Tensor like numpy.average -Tensor Average(const Tensor &t, int axis); -/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis-1] rows, -/// and shape_[axis]*...*shape_[nDim()] columns. -/// and do softmax along each row. -Tensor SoftMax(const Tensor &t, int axis = 0); -void SoftMax(const Tensor &t, int axis, Tensor *ret); +/// Element-wise operation, out[i]= (in[i] <= x) ? 1.f : 0.f +template <typename SType> +Tensor operator<=(const Tensor &in, const SType x); +template <typename SType> +void LE(const Tensor &in, const SType x, Tensor *out); -/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis] rows, -/// and shape_[axis+1]*...*shape_[nDim()] columns. -/// and do softmax along each row. -// Tensor Softmax(const Tensor& t, int axis = -1); -// void Softmax(const Tensor& t, Tensor* ret, int axis = -1); - -/// Element-wise operation, ret[i]= (t[i] < x) ? 1.f : 0.f -template <typename DType> -Tensor operator<(const Tensor &t, const DType x); -template <typename DType> -void LT(const Tensor &t, DType x, Tensor *ret); - -/// Element-wise operation, ret[i]= (t[i] <= x) ? 1.f : 0.f -template <typename DType> -Tensor operator<=(const Tensor &t, const DType x); -template <typename DType> -void LE(const Tensor &t, DType x, Tensor *ret); - -/// Element-wise operation, ret[i]= (t[i] > x) ? 1.f : 0.f -template <typename DType> -Tensor operator>(const Tensor &t, const DType x); -template <typename DType> -void GT(const Tensor &t, DType x, Tensor *ret); - -/// Element-wise operation, ret[i]= (t[i] >= x) ? 1.f : 0.f -template <typename DType> -Tensor operator>=(const Tensor &t, const DType x); -template <typename DType> -void GE(const Tensor &t, DType x, Tensor *ret); - -/// Element-wise opeartion, ret[i]=t[i]^x -template <typename DType> -Tensor Pow(const Tensor &t, DType x); -/// Element-wise opeartion, ret[i]=t[i]^x -template <typename DType> -void Pow(const Tensor &t, DType x, Tensor *ret); -/// Element-wise opeartion, ret[i]=baes[i]^exp[i] -Tensor Pow(const Tensor &base, Tensor exp); -/// Element-wise opeartion, ret[i]=baes[i]^exp[i] -void Pow(const Tensor &base, const Tensor &exp, Tensor *ret); +/// Element-wise operation, out[i]= (in[i] > x) ? 1.f : 0.f +template <typename SType> +Tensor operator>(const Tensor &in, const SType x); +template <typename SType> +void GT(const Tensor &in, const SType x, Tensor *out); + +/// Element-wise operation, out[i]= (in[i] >= x) ? 1.f : 0.f +template <typename SType> +Tensor operator>=(const Tensor &in, const SType x); +template <typename SType> +void GE(const Tensor &in, const SType x, Tensor *out); Tensor operator+(const Tensor &lhs, const Tensor &rhs); -void Add(const Tensor &lhs, const Tensor &rhs, Tensor *ret); +void Add(const Tensor &lhs, const Tensor &rhs, Tensor *out); Tensor operator-(const Tensor &lhs, const Tensor &rhs); -void Sub(const Tensor &lhs, const Tensor &rhs, Tensor *ret); +void Sub(const Tensor &lhs, const Tensor &rhs, Tensor *out); Tensor operator*(const Tensor &lhs, const Tensor &rhs); -void EltwiseMult(const Tensor &lhs, const Tensor &rhs, Tensor *ret); +void EltwiseMult(const Tensor &lhs, const Tensor &rhs, Tensor *out); Tensor operator/(const Tensor &lhs, const Tensor &rhs); -void Div(const Tensor &lhs, const Tensor &rhs, Tensor *ret); +void Div(const Tensor &lhs, const Tensor &rhs, Tensor *out); -template <typename DType> -Tensor operator+(const Tensor &t, DType x); -template <typename DType> -void Add(const Tensor &t, DType x, Tensor *ret); - -template <typename DType> -Tensor operator-(const Tensor &t, DType x); -template <typename DType> -void Sub(const Tensor &t, DType x, Tensor *ret); - -template <typename DType> -Tensor operator*(const Tensor &t, DType x); -template <typename DType> -void EltwiseMult(const Tensor &t, DType x, Tensor *ret); - -template <typename DType> -Tensor operator/(const Tensor &t, DType x); -template <typename DType> -void Div(const Tensor &t, DType x, Tensor *ret); +template <typename SType> +Tensor operator+(const Tensor &in, const SType x); +template <typename SType> +void Add(const Tensor &in, const SType x, Tensor *out); -// ================Blas operations============================================ -// We fix the scalar argument type to be float. +template <typename SType> +Tensor operator-(const Tensor &in, const SType x); +template <typename SType> +void Sub(const Tensor &in, const SType x, Tensor *out); -// ===== Level 1 -// TODO(wangwei) make amax/amin/asum a member function of tensor -// void Amax(Tensor, Context* ctx); Get the index of the max value in a vector -// void Asum(Tensor Context* ctx); +template <typename SType> +Tensor operator*(const Tensor &in, const SType x); +template <typename SType> +void EltwiseMult(const Tensor &in, const SType x, Tensor *out); -// template <typename DType> -// void Axpy(DType x, const Blob& t, Blob* ret, Context* ctx); +/// For each element e of Tensor 'in', compute e / x +template <typename SType> +Tensor operator/(const Tensor &in, const SType x); +/// For each element e of Tensor 'in', compute e / x into out +template <typename SType> +void Div(const Tensor &in, const SType x, Tensor *out); -/// Do matrix vector multipication or matrix matrix multiplication depdending -/// on the Tensor shape. result = A * B -Tensor Mult(const Tensor &A, const Tensor &B); -/// Do matrix vector multipication or matrix matrix multiplication depdending -/// on the Tensor shape. C = A * B -void Mult(const Tensor &A, const Tensor &B, Tensor *C); +/// For each element e of Tensor 'in', compute x/e +template <typename SType> +Tensor Div(const SType x, const Tensor &in); +/// For each element e of Tensor 'in', compute x/e into 'out' +template <typename SType> +void Div(const SType x, const Tensor &in, Tensor *out); -/// Do matrix vector multipication or matrix matrix multiplication depdending -/// on the Tensor shape. ret = alpha lhs * rhs + beta * ret -void Mult(const float alpha, const Tensor &lhs, const Tensor &rhs, - const float beta, Tensor *C); +template <typename SType> +SType Sum(const Tensor &in); -// ================Random operations========================================== -/// For each element x set x = 1 if random() < p; otherwise x = 1. -void Bernoulli(float p, Tensor *t); -/// Fill in Tensor 't' following uniform distribution. -void Uniform(float low, float high, Tensor *t); -/// Fill in Tensor 't' following Gaussian distribution. -void Gaussian(float mean, float std, Tensor *t); +// ============Matrix (row/column) operations================================== +/// Average elements in the Tensor, currently only support vector and matrix. +/// if 'axis' is 0, average all rows into a single row +/// if 'axis' is 1, average all columns into a single column +/// TODO(wangwei) support arbitrary Tensor like numpy.average +Tensor Average(const Tensor &in, const int axis); +/// Sum elements in the Tensor, currently only support vector and matrix. +/// if 'axis' is 0, sum all rows into a single row +/// if 'axis' is 1, sum all columns into a single column +/// TODO(wangwei) support arbitrary Tensor like numpy.sum +Tensor Sum(const Tensor &in, const int axis); +/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis-1] rows, +/// and shape_[axis]*...*shape_[nDim()] columns. +/// and do softmax along each row. +Tensor SoftMax(const Tensor &in, const int axis = 0); +void SoftMax(const Tensor &in, const int axis, Tensor *out); -// follow the consistency guide -// https://issues.apache.org/jira/browse/SINGA-182 -// ============Matrix vector operations======================================= /// Add column 'v' with each column of matrix M void AddColumn(const Tensor &v, Tensor *M); -void AddColumn(const float alpha, const float beta, const Tensor &v, +/// For each column 'c' of matrix out, do c=alpha*v + beta*c +template <typename SType> +void AddColumn(const SType alpha, const SType beta, const Tensor &v, Tensor *out); -/// Sub column 'v' by each column of matrix M -void SubColumn(const Tensor &v, Tensor *M); -/// Multiply column 'v' and each column of matrix M; write results into 'out' -void MultColumn(const Tensor &v, Tensor *M); -/// Divide column 'v' by each column of matrix M; write results into 'out' -void DivColumn(const Tensor &v, Tensor *M); - /// Add row 'v' with each row of matrix M; write results into 'out' void AddRow(const Tensor &v, Tensor *out); -void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M); -/// Sub row 'v' by each row of matrix M; write results into 'out' -void SubRow(const Tensor &v, Tensor *M); -/// Multiply row 'v' with each row of matrix M; write results into 'out' -void MultRow(const Tensor &v, Tensor *M); +/// For each row 'r' of matrix out, do r=alpha*v + beta*r +template <typename SType> +void AddRow(const SType alpha, const SType beta, const Tensor &v, Tensor *M); +/// Divide column 'v' by each column of matrix M; write results into 'out' +void DivColumn(const Tensor &v, Tensor *M); /// Divide row 'v' by each row of matrix M; write results into 'out' void DivRow(const Tensor &v, Tensor *M); - -/// Sum all rows of matrix M into a single row as 'out' -void SumRows(const Tensor &M, Tensor *out); +/// Multiply column 'v' and each column of matrix M; write results into 'out' +void MultColumn(const Tensor &v, Tensor *M); +/// Multiply row 'v' with each row of matrix M; write results into 'out' +void MultRow(const Tensor &v, Tensor *M); +/// Sub column 'v' by each column of matrix M +void SubColumn(const Tensor &v, Tensor *M); +/// Sub row 'v' by each row of matrix M; write results into 'out' +void SubRow(const Tensor &v, Tensor *M); /// Sum all columns of matrix M into a single column as 'out' void SumColumns(const Tensor &M, Tensor *out); +/// Sum all rows of matrix M into a single row as 'out' +void SumRows(const Tensor &M, Tensor *out); -/// For each element x of Tensor 'in', compute alpha/x +// ================Random operations========================================== +/// For each element x set x = 1 if random() < p; otherwise x = 1. template <typename SType> -Tensor Div(const SType alpha, const Tensor &in); +void Bernoulli(const SType p, Tensor *out); +/// Fill in Tensor 't' following Gaussian distribution. +template <typename SType> +void Gaussian(const SType mean, const SType std, Tensor *out); +/// Fill in Tensor 't' following uniform distribution. +template <typename SType> +void Uniform(const SType low, const SType high, Tensor *out); -/// For each element x of Tensor 'in', compute alpha/x into 'out' +// ================Blas operations============================================ +// TODO(wangwei) make amax/amin/asum a member function of tensor + +/// out = alpha*in + out template <typename SType> -void Div(const SType alpha, const Tensor &in, Tensor *out); - -/* -/// Multiply each column of the lhs matrix with the rhs column -Tensor MultColumn(const Tensor &lhs, const Tensor &rhs); -void MultColumn(const Tensor &lhs, const Tensor &rhs, Tensor *ret); -/// Multiply each row of the lhs matrix with the rhs row -Tensor MultRow(const Tensor &lhs, const Tensor &rhs); -void MultRow(const Tensor &lhs, const Tensor &rhs, Tensor *ret); -/// Div each row of the lhs matrix with the rhs column -Tensor DivColumn(const Tensor &lhs, const Tensor &rhs); -void DivColumn(const Tensor &lhs, const Tensor &rhs, Tensor *ret); -/// Divide each row of the lhs matrix by the rhs row -Tensor DivRow(const Tensor &lhs, const Tensor &rhs); -void DivRow(const Tensor &lhs, const Tensor &rhs, Tensor *ret); -*/ +void Axpy(SType alpha, const Tensor &in, Tensor *out); + +/// Do matrix vector multipication or matrix matrix multiplication depdending +/// on the Tensor shape. result = A * B +Tensor Mult(const Tensor &A, const Tensor &B); +/// Do matrix vector multipication or matrix matrix multiplication depdending +/// on the Tensor shape. C = A * B +void Mult(const Tensor &A, const Tensor &B, Tensor *C); +/// Do matrix vector multipication or matrix matrix multiplication depdending +/// on the Tensor shape. out = alpha lhs * rhs + beta * out +template <typename SType> +void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta, + Tensor *C); } // namespace singa #endif // SINGA_CORE_TENSOR_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/564c88ad/src/core/tensor/tensor.cc ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc index 5ae375c..f4e9da2 100644 --- a/src/core/tensor/tensor.cc +++ b/src/core/tensor/tensor.cc @@ -26,61 +26,61 @@ namespace singa { Tensor::~Tensor() { // LOG(ERROR) << "~"; - if (blob_ != nullptr && blob_->DecRefCount() == 0) - device_->FreeBlob(blob_); + if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); blob_ = nullptr; } Tensor::Tensor() { device_ = &defaultDevice; } -Tensor::Tensor(const Shape &shape, DataType dtype) +Tensor::Tensor(const Shape &shape, const DataType dtype) : data_type_(dtype), device_(&defaultDevice), shape_(shape) { device_ = &defaultDevice; blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_)); } -Tensor::Tensor(Shape &&shape, DataType dtype) +Tensor::Tensor(Shape &&shape, const DataType dtype) : data_type_(dtype), device_(&defaultDevice), shape_(shape) { device_ = &defaultDevice; blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_)); } -Tensor::Tensor(const Shape &shape, Device *device, DataType dtype) +Tensor::Tensor(const Shape &shape, Device *device, const DataType dtype) : data_type_(dtype), device_(device), shape_(shape) { blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_)); } -Tensor::Tensor(Shape &&shape, Device *device, DataType dtype) +Tensor::Tensor(Shape &&shape, Device *device, const DataType dtype) : data_type_(dtype), device_(device), shape_(shape) { blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_)); } -Tensor::Tensor(const Tensor &t) - : transpose_(t.transpose_), data_type_(t.data_type_), device_(t.device_), - blob_(t.blob()), shape_(t.shape_) { +Tensor::Tensor(const Tensor &in) + : transpose_(in.transpose_), + data_type_(in.data_type_), + device_(in.device_), + blob_(in.blob()), + shape_(in.shape_) { blob_->IncRefCount(); - // LOG(ERROR) << "const&"; } -Tensor::Tensor(Tensor &&t) - : transpose_(t.transpose_), data_type_(t.data_type_), device_(t.device_), - shape_(std::move(t.shape_)) { - blob_ = t.blob_; - t.blob_ = nullptr; - // LOG(ERROR) << "&&"; +Tensor::Tensor(Tensor &&in) + : transpose_(in.transpose_), + data_type_(in.data_type_), + device_(in.device_), + shape_(std::move(in.shape_)) { + blob_ = in.blob_; + in.blob_ = nullptr; } -void Tensor::ResetLike(const Tensor &t) { - if (blob_ == nullptr || device_ != t.device_ || MemSize() != t.MemSize()) { - if (blob_ != nullptr && blob_->DecRefCount() == 0) - device_->FreeBlob(blob_); - shape_ = t.shape_; - device_ = t.device_; - data_type_ = t.data_type_; - blob_ = device_->NewBlob(t.MemSize()); +void Tensor::ResetLike(const Tensor &in) { + if (blob_ == nullptr || device_ != in.device_ || MemSize() != in.MemSize()) { + if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); + shape_ = in.shape_; + device_ = in.device_; + data_type_ = in.data_type_; + blob_ = device_->NewBlob(in.MemSize()); } } void Tensor::Reshape(const Shape &shape) { if (Product(shape_) != Product(shape)) { - if (blob_ != nullptr && blob_->DecRefCount() == 0) - device_->FreeBlob(blob_); + if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); blob_ = device_->NewBlob(Product(shape) * SizeOf(data_type_)); } shape_ = shape; @@ -88,17 +88,15 @@ void Tensor::Reshape(const Shape &shape) { void Tensor::Reshape(Shape &&shape) { if (Product(shape_) != Product(shape)) { - if (blob_ != nullptr && blob_->DecRefCount() == 0) - device_->FreeBlob(blob_); + if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); blob_ = device_->NewBlob(Product(shape) * SizeOf(data_type_)); } shape_ = std::move(shape); } -void Tensor::AsType(DataType type) { +void Tensor::AsType(const DataType type) { if (data_type_ != type) { - if (blob_ != nullptr && blob_->DecRefCount() == 0) - device_->FreeBlob(blob_); + if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); blob_ = device_->NewBlob(Product(shape_) * SizeOf(type)); data_type_ = type; } @@ -109,8 +107,7 @@ void Tensor::ToDevice(Device *dst) { if (device_ != dst) { Tensor tmp(shape_, dst, data_type_); tmp.CopyData(*this); - if (blob_ != nullptr && blob_->DecRefCount() == 0) - device_->FreeBlob(blob_); + if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); blob_ = tmp.blob_; tmp.blob_ = nullptr; device_ = dst; @@ -120,7 +117,7 @@ void Tensor::ToDevice(Device *dst) { void Tensor::ToHost() { ToDevice(device_->host()); } template <typename DType> -void Tensor::CopyDataFromHostPtr(const DType *src, size_t num) { +void Tensor::CopyDataFromHostPtr(const DType *src, const size_t num) { CHECK_EQ(sizeof(DType), SizeOf(data_type_)) << "data_type is " << DataType_Name(data_type_) << " user given type is of size " << sizeof(DType); @@ -130,8 +127,8 @@ void Tensor::CopyDataFromHostPtr(const DType *src, size_t num) { LOG(WARNING) << "Copy data from null host ptr"; } } -template void Tensor::CopyDataFromHostPtr(const float *src, size_t num); -template void Tensor::CopyDataFromHostPtr(const int *src, size_t num); +template void Tensor::CopyDataFromHostPtr(const float *src, const size_t num); +template void Tensor::CopyDataFromHostPtr(const int *src, const size_t num); void Tensor::CopyData(const Tensor &src) { CHECK_EQ(Size(), src.Size()); @@ -162,29 +159,27 @@ Tensor Tensor::T() const { return t; } -Tensor &Tensor::operator=(const Tensor &t) { +Tensor &Tensor::operator=(const Tensor &in) { // LOG(ERROR) << "= const &"; - if (blob_ != nullptr && blob_->DecRefCount() == 0) - device_->FreeBlob(blob_); - transpose_ = t.transpose_; - data_type_ = t.data_type_; - shape_ = t.shape_; - device_ = t.device_; - blob_ = t.blob(); + if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); + transpose_ = in.transpose_; + data_type_ = in.data_type_; + shape_ = in.shape_; + device_ = in.device_; + blob_ = in.blob(); blob_->IncRefCount(); return *this; } -Tensor &Tensor::operator=(Tensor &&t) { +Tensor &Tensor::operator=(Tensor &&in) { // LOG(ERROR) << "= &&"; - if (blob_ != nullptr && blob_->DecRefCount() == 0) - device_->FreeBlob(blob_); - transpose_ = t.transpose_; - data_type_ = t.data_type_; - shape_ = std::move(t.shape_); - device_ = t.device_; - blob_ = t.blob_; - t.blob_ = nullptr; + if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); + transpose_ = in.transpose_; + data_type_ = in.data_type_; + shape_ = std::move(in.shape_); + device_ = in.device_; + blob_ = in.blob_; + in.blob_ = nullptr; return *this; } @@ -200,10 +195,10 @@ Tensor Reshape(const Tensor &in, Shape &&s) { return out; } -#define GenUnaryTensorArgMemberFn(op, fn) \ - Tensor &Tensor::op(const Tensor &t) { \ - fn(*this, t, this); \ - return *this; \ +#define GenUnaryTensorArgMemberFn(op, fn) \ + Tensor &Tensor::op(const Tensor &in) { \ + fn(*this, in, this); \ + return *this; \ } GenUnaryTensorArgMemberFn(operator+=, Add); @@ -211,12 +206,13 @@ GenUnaryTensorArgMemberFn(operator-=, Sub); GenUnaryTensorArgMemberFn(operator*=, EltwiseMult); GenUnaryTensorArgMemberFn(operator/=, Div); -#define GenUnaryScalarArgMemberFn(op, fn) \ - template <typename DType> Tensor &Tensor::op(DType x) { \ - fn(*this, x, this); \ - return *this; \ - } \ - template Tensor &Tensor::op<float>(float x) +#define GenUnaryScalarArgMemberFn(op, fn) \ + template <typename DType> \ + Tensor &Tensor::op(const DType x) { \ + fn(*this, x, this); \ + return *this; \ + } \ + template Tensor &Tensor::op<float>(const float x) GenUnaryScalarArgMemberFn(operator-=, Sub); GenUnaryScalarArgMemberFn(operator+=, Add); @@ -224,103 +220,105 @@ GenUnaryScalarArgMemberFn(operator*=, EltwiseMult); GenUnaryScalarArgMemberFn(operator/=, Div); // ====================Tensor Operations======================================= -void CopyDataToFrom(Tensor *dst, const Tensor &src, size_t num, - size_t dst_offset, size_t src_offset) { +void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num, + const size_t dst_offset, const size_t src_offset) { auto width = SizeOf(src.data_type()); CHECK_EQ(width, SizeOf(dst->data_type())); size_t nBytes = num * width; - dst_offset *= width; - src_offset *= width; - CHECK_GE(src.MemSize(), src_offset + nBytes); - CHECK_GE(dst->MemSize(), dst_offset + nBytes); + auto d_offset = dst_offset * width; + auto s_offset = src_offset * width; + CHECK_GE(src.MemSize(), s_offset + nBytes); + CHECK_GE(dst->MemSize(), d_offset + nBytes); Device *src_dev = src.device(), *dst_dev = dst->device(); Blob *from = src.blob(), *to = dst->blob(); if (dst_dev->lang() != src_dev->lang()) { // let the none cpp device conduct copy op if (dst_dev->lang() == kCpp) { - src_dev->CopyDataToFrom(to, from, nBytes, kDeviceToHost, dst_offset, - src_offset); + src_dev->CopyDataToFrom(to, from, nBytes, kDeviceToHost, d_offset, + s_offset); } else if (src_dev->lang() == kCpp) { - dst_dev->CopyDataToFrom(to, from, nBytes, kHostToDevice, dst_offset, - src_offset); + dst_dev->CopyDataToFrom(to, from, nBytes, kHostToDevice, d_offset, + s_offset); } else { LOG(FATAL) << "Not support mem copy betwee Cuda and OpenCL device"; } } else { auto direct = src_dev->lang() == kCpp ? kHostToHost : kDeviceToDevice; - src_dev->CopyDataToFrom(to, from, nBytes, direct, dst_offset, src_offset); + src_dev->CopyDataToFrom(to, from, nBytes, direct, d_offset, s_offset); } } //============================================================================ /// typedef DType accroding to type value. /// DType would be used in the code block __VA_ARGS__. -#define TYPE_SWITCH(type, DType, ...) \ - do { \ - switch (type) { \ - case kFloat32: { \ - typedef float DType; \ - { __VA_ARGS__ } \ - break; \ - } \ - case kInt: { \ - typedef int DType; \ - { __VA_ARGS__ } \ - break; \ - } \ - case kChar: { \ - typedef char DType; \ - { __VA_ARGS__ } \ - break; \ - } \ - default: \ - LOG(FATAL) << "Unknow data type = " << DataType_Name(type); \ - } \ +#define TYPE_SWITCH(type, DType, ...) \ + do { \ + switch (type) { \ + case kFloat32: { \ + typedef float DType; \ + { __VA_ARGS__ } \ + break; \ + } \ + case kInt: { \ + typedef int DType; \ + { __VA_ARGS__ } \ + break; \ + } \ + case kChar: { \ + typedef char DType; \ + { __VA_ARGS__ } \ + break; \ + } \ + default: \ + LOG(FATAL) << "Unknow data type = " << DataType_Name(type); \ + } \ } while (0) /// typedef DType and Lang according to data type and device programming /// language respectively. /// type is from DataType, and lang is from LangType. /// DType and Lang would be used in __VA_ARGS__. -#define TYPE_LANG_SWITCH(dtype, DType, ltype, Lang, ...) \ - do { \ - const int _SwitchShift = 3; \ - int _SwitchHash = ((dtype) << _SwitchShift) + (ltype); \ - switch (_SwitchHash) { \ - case ((kFloat32 << _SwitchShift) + kCuda): { \ - typedef float DType; \ - typedef lang::Cuda Lang; \ - { __VA_ARGS__ } \ - break; \ - } \ - case ((kFloat32 << _SwitchShift) + kCpp): { \ - typedef float DType; \ - typedef lang::Cpp Lang; \ - { __VA_ARGS__ } \ - break; \ - } \ - case ((kFloat32 << _SwitchShift) + kOpencl): { \ - typedef float DType; \ - typedef lang::Opencl Lang; \ - { __VA_ARGS__ } \ - break; \ - } \ - default: \ - LOG(FATAL) << "Unknown combination of data type " \ - << DataType_Name(dtype) << " and language " \ - << LangType_Name(ltype); \ - } \ +#define TYPE_LANG_SWITCH(dtype, DType, ltype, Lang, ...) \ + do { \ + const int _SwitchShift = 3; \ + int _SwitchHash = ((dtype) << _SwitchShift) + (ltype); \ + switch (_SwitchHash) { \ + case ((kFloat32 << _SwitchShift) + kCuda): { \ + typedef float DType; \ + typedef lang::Cuda Lang; \ + { __VA_ARGS__ } \ + break; \ + } \ + case ((kFloat32 << _SwitchShift) + kCpp): { \ + typedef float DType; \ + typedef lang::Cpp Lang; \ + { __VA_ARGS__ } \ + break; \ + } \ + case ((kFloat32 << _SwitchShift) + kOpencl): { \ + typedef float DType; \ + typedef lang::Opencl Lang; \ + { __VA_ARGS__ } \ + break; \ + } \ + default: \ + LOG(FATAL) << "Unknown combination of data type " \ + << DataType_Name(dtype) << " and language " \ + << LangType_Name(ltype); \ + } \ } while (0) -template <typename SType> void Tensor::SetValue(const SType x) { +// =============Element-wise operations==================================== +template <typename SType> +void Tensor::SetValue(const SType x) { CHECK_EQ(sizeof(SType), SizeOf(data_type_)); auto size = Size(); auto ptr = blob_; TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, { // cast x to DType - device_->Exec( - [size, x, ptr](Context *ctx) { Set<DType, Lang>(size, x, ptr, ctx); }, - {}, {ptr}); + device_->Exec([size, x, ptr](Context *ctx) { + Set<DType, Lang>(size, x, ptr, ctx); + }, {}, {ptr}); }); } template void Tensor::SetValue<float>(const float x); @@ -328,21 +326,19 @@ template void Tensor::SetValue<float>(const float x); #define EltwiseUnaryTensorFn(fn, t, ret) \ do { \ TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { \ - ret->device()->Exec( \ - [t, ret](Context* ctx) { \ - fn<DType, Lang>(t.Size(), t.blob(), ret->blob(), ctx); \ - }, \ - {t.blob()}, {ret->blob()}); \ + ret->device()->Exec([t, ret](Context * ctx) { \ + fn<DType, Lang>(t.Size(), t.blob(), ret->blob(), ctx); \ + }, {t.blob()}, {ret->blob()}); \ }); \ } while (0) -#define GenUnaryTensorFn(fn) \ - Tensor fn(const Tensor &t) { \ - Tensor ret(t.shape(), t.device(), t.data_type()); \ - auto *retptr = &ret; \ - EltwiseUnaryTensorFn(fn, t, retptr); \ - return ret; \ - } \ +#define GenUnaryTensorFn(fn) \ + Tensor fn(const Tensor &in) { \ + Tensor ret(in.shape(), in.device(), in.data_type()); \ + auto *retptr = &ret; \ + EltwiseUnaryTensorFn(fn, in, retptr); \ + return ret; \ + } \ void fn(const Tensor &in, Tensor *out) { EltwiseUnaryTensorFn(fn, in, out); } GenUnaryTensorFn(Abs); @@ -355,33 +351,89 @@ GenUnaryTensorFn(Sqrt); GenUnaryTensorFn(Square); GenUnaryTensorFn(Tanh); -// TODO(wangwei) conside async exec -template <> float Sum<float>(const Tensor &t) { - float s = 0.0f; - TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { - t.device()->Exec( - [t, &s](Context *ctx) { - Sum<DType, Lang>(t.Size(), t.blob(), &s, ctx); - }, - {t.blob()}, {}); - }); - return s; -} +#define EltwiseBinaryTensorFn(fn, lhs, rhs, ret) \ + do { \ + TYPE_LANG_SWITCH(lhs.data_type(), DType, lhs.device()->lang(), Lang, { \ + CHECK_EQ(sizeof(DType), SizeOf(rhs.data_type())); \ + ret->device()->Exec([lhs, rhs, ret](Context * ctx) { \ + fn<DType, Lang>(lhs.Size(), lhs.blob(), rhs.blob(), ret->blob(), ctx); \ + }, {lhs.blob(), rhs.blob()}, {ret->blob()}); \ + }); \ + } while (0) -Tensor Sum(const Tensor &M, int axis) { - if (axis == 0) { - Tensor out(Shape{M.shape(1)}, M.device(), M.data_type()); - SumRows(M, &out); - return out; - } else { - CHECK_EQ(axis, 1) << "Not support Sum over axis = " << axis; - Tensor out(Shape{M.shape(0)}, M.device(), M.data_type()); - SumColumns(M, &out); - return out; +#define GenBinaryTensorFn(op, fn) \ + Tensor op(const Tensor &lhs, const Tensor &rhs) { \ + Tensor ret(lhs.shape(), lhs.device(), lhs.data_type()); \ + fn(lhs, rhs, &ret); \ + return ret; \ + } \ + void fn(const Tensor &lhs, const Tensor &rhs, Tensor *ret) { \ + EltwiseBinaryTensorFn(fn, lhs, rhs, ret); \ } + +GenBinaryTensorFn(operator+, Add); +GenBinaryTensorFn(operator-, Sub); +GenBinaryTensorFn(operator*, EltwiseMult); +GenBinaryTensorFn(operator/, Div); +GenBinaryTensorFn(Pow, Pow); + +#define EltwiseTensorScalarFn(fn, t, x, ret) \ + do { \ + TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { \ + static_assert(std::is_same<SType, DType>::value, \ + "The Scalar type must match the Tensor data type"); \ + ret->device()->Exec([t, x, ret](Context * ctx) { \ + fn<DType, Lang>(t.Size(), t.blob(), x, ret->blob(), ctx); \ + }, {t.blob()}, {ret->blob()}); \ + }); \ + } while (0) + +#define GenTensorScalarFn(op, fn) \ + template <typename SType> \ + Tensor op(const Tensor &in, const SType x) { \ + Tensor ret(in.shape(), in.device(), in.data_type()); \ + fn(in, x, &ret); \ + return ret; \ + } \ + template <typename SType> \ + void fn(const Tensor &in, const SType x, Tensor *ret) { \ + EltwiseTensorScalarFn(fn, in, x, ret); \ + } \ + template Tensor op<float>(const Tensor &in, const float x); \ + template void fn<float>(const Tensor &in, const float x, Tensor *ret) + +GenTensorScalarFn(operator+, Add); +GenTensorScalarFn(operator-, Sub); +GenTensorScalarFn(operator*, EltwiseMult); +GenTensorScalarFn(operator/, Div); +GenTensorScalarFn(Pow, Pow); +GenTensorScalarFn(operator<, LT); +GenTensorScalarFn(operator<=, LE); +GenTensorScalarFn(operator>, GT); +GenTensorScalarFn(operator>=, GE); +template <typename SType> +Tensor Div(const SType alpha, const Tensor &in) { + Tensor out(in.shape(), in.device(), in.data_type()); + Div(alpha, in, &out); + return out; } +template Tensor Div<float>(const float, const Tensor &); -Tensor Average(const Tensor &t, int axis) { +template <typename SType> +void Div(const SType alpha, const Tensor &in, Tensor *out) { + CheckDataTypeAndLang(in, *out); + CHECK(in.shape() == out->shape()); + TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, { + // TODO(wangwei) type cast SType to DType; + in.device()->Exec([alpha, in, out](Context *ctx) { + Div<DType, Lang>(in.Size(), alpha, in.blob(), out->blob(), ctx); + }, {in.blob()}, {out->blob()}); + }); +} +template void Div<float>(const float, const Tensor &, Tensor *); + +// =============Matrix operations============================================ +Tensor Average(const Tensor &M, int axis) { // operator/ only has implementation for float scalar type, hence it is // necessary to cast the denominator to a float. // TODO(wangwei) implement function for cast scalar type involved in Tensor @@ -396,10 +448,34 @@ Tensor Average(const Tensor &t, int axis) { // .... // } if (axis == 0) { - return Sum(t, 0) / (1.0f * t.shape().at(0)); + return Sum(M, 0) / (1.0f * M.shape(0)); } else { CHECK_EQ(axis, 1); - return Sum(t, 1) / (1.0f * t.shape().at(1)); + return Sum(M, 1) / (1.0f * M.shape(1)); + } +} +// TODO(wangwei) conside async exec +template <> +float Sum<float>(const Tensor &in) { + float s = 0.0f; + TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, { + in.device()->Exec([in, &s](Context *ctx) { + Sum<DType, Lang>(in.Size(), in.blob(), &s, ctx); + }, {in.blob()}, {}); + }); + return s; +} + +Tensor Sum(const Tensor &M, int axis) { + if (axis == 0) { + Tensor out(Shape{M.shape(1)}, M.device(), M.data_type()); + SumRows(M, &out); + return out; + } else { + CHECK_EQ(axis, 1) << "Not support Sum over axis = " << axis; + Tensor out(Shape{M.shape(0)}, M.device(), M.data_type()); + SumColumns(M, &out); + return out; } } @@ -424,141 +500,10 @@ void SoftMax(const Tensor &in, int axis, Tensor *out) { DivColumn(sum, out); } -#define EltwiseBinaryTensorFn(fn, lhs, rhs, ret) \ - do { \ - TYPE_LANG_SWITCH(lhs.data_type(), DType, lhs.device()->lang(), Lang, { \ - CHECK_EQ(sizeof(DType), SizeOf(rhs.data_type())); \ - ret->device()->Exec( \ - [lhs, rhs, ret](Context *ctx) { \ - fn<DType, Lang>(lhs.Size(), lhs.blob(), rhs.blob(), ret->blob(), \ - ctx); \ - }, \ - {lhs.blob(), rhs.blob()}, {ret->blob()}); \ - }); \ - } while (0) - -#define GenBinaryTensorFn(op, fn) \ - Tensor op(const Tensor &lhs, const Tensor &rhs) { \ - Tensor ret(lhs.shape(), lhs.device(), lhs.data_type()); \ - fn(lhs, rhs, &ret); \ - return ret; \ - } \ - void fn(const Tensor &lhs, const Tensor &rhs, Tensor *ret) { \ - EltwiseBinaryTensorFn(fn, lhs, rhs, ret); \ - } - -GenBinaryTensorFn(operator+, Add); -GenBinaryTensorFn(operator-, Sub); -GenBinaryTensorFn(operator*, EltwiseMult); -GenBinaryTensorFn(operator/, Div); -GenBinaryTensorFn(Pow, Pow); - -#define EltwiseTensorScalarFn(fn, t, x, ret) \ - do { \ - TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { \ - static_assert(std::is_same<SType, DType>::value, \ - "The Scalar type must match the Tensor data type"); \ - ret->device()->Exec( \ - [t, x, ret](Context *ctx) { \ - fn<DType, Lang>(t.Size(), t.blob(), x, ret->blob(), ctx); \ - }, \ - {t.blob()}, {ret->blob()}); \ - }); \ - } while (0) - -#define GenTensorScalarFn(op, fn) \ - template <typename SType> Tensor op(const Tensor &t, SType x) { \ - Tensor ret(t.shape(), t.device(), t.data_type()); \ - fn(t, x, &ret); \ - return ret; \ - } \ - template <typename SType> void fn(const Tensor &t, SType x, Tensor *ret) { \ - EltwiseTensorScalarFn(fn, t, x, ret); \ - } \ - template Tensor op<float>(const Tensor &t, float x); \ - template void fn<float>(const Tensor &t, const float x, Tensor *ret) - -GenTensorScalarFn(operator+, Add); -GenTensorScalarFn(operator-, Sub); -GenTensorScalarFn(operator*, EltwiseMult); -GenTensorScalarFn(operator/, Div); -GenTensorScalarFn(Pow, Pow); -GenTensorScalarFn(operator<, LT); -GenTensorScalarFn(operator<=, LE); -GenTensorScalarFn(operator>, GT); -GenTensorScalarFn(operator>=, GE); - -// ================Blas operations============================================ -Tensor Mult(const Tensor &lhs, const Tensor &rhs) { - Tensor ret(Shape{lhs.shape(0), rhs.shape(1)}, lhs.device(), lhs.data_type()); - Mult(lhs, rhs, &ret); - return ret; -} - -void Mult(const Tensor &lhs, const Tensor &rhs, Tensor *ret) { - Mult(1.0f, lhs, rhs, 0.0f, ret); -} - -void Mult(const float alpha, const Tensor &A, const Tensor &B, const float beta, - Tensor *C) { - CHECK_EQ(A.shape().size(), 2u); - if (B.nDim() == 1u) { - TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, { - C->device()->Exec( - [alpha, A, beta, B, C](Context *ctx) { - GEMV<DType, Lang>(A.transpose(), A.shape(0), A.shape(1), alpha, - A.blob(), B.blob(), beta, C->blob(), ctx); - }, - {A.blob(), B.blob()}, {C->blob()}); - }); - } else { - CHECK(!C->transpose()); - TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, { - C->device()->Exec( - [alpha, A, beta, B, C](Context *ctx) { - GEMM<DType, Lang>(A.transpose(), B.transpose(), A.shape(0), - B.shape(1), A.shape(1), alpha, A.blob(), B.blob(), - beta, C->blob(), ctx); - }, - {A.blob(), B.blob()}, {C->blob()}); - }); - } -} - -void Bernoulli(float p, Tensor *t) { - TYPE_LANG_SWITCH(t->data_type(), DType, t->device()->lang(), Lang, { - t->device()->Exec( - [p, t](Context *ctx) { - Bernoulli<DType, Lang>(t->Size(), p, t->blob(), ctx); - }, - {}, {t->blob()}, true); - }); -} - -void Uniform(float low, float high, Tensor *t) { - TYPE_LANG_SWITCH(t->data_type(), DType, t->device()->lang(), Lang, { - t->device()->Exec( - [low, high, t](Context *ctx) { - Uniform<DType, Lang>(t->Size(), low, high, t->blob(), ctx); - }, - {}, {t->blob()}, true); - }); -} - -void Gaussian(float mean, float std, Tensor *t) { - TYPE_LANG_SWITCH(t->data_type(), DType, t->device()->lang(), Lang, { - t->device()->Exec( - [mean, std, t](Context *ctx) { - Gaussian<DType, Lang>(t->Size(), mean, std, t->blob(), ctx); - }, - {}, {t->blob()}, true); - }); -} - -// ======follow the consistency guide void AddColumn(const Tensor &v, Tensor *M) { AddColumn(1, 1, v, M); } /// Add column 'v' onto each column of matrix M; -void AddColumn(const float alpha, const float beta, const Tensor &v, +template <typename SType> +void AddColumn(const SType alpha, const SType beta, const Tensor &v, Tensor *M) { if (M->transpose()) { Tensor X = M->T(); @@ -570,15 +515,19 @@ void AddColumn(const float alpha, const float beta, const Tensor &v, CHECK_EQ(nb_row, v.Size()); Tensor one(Shape{1, nb_col}, M->device(), M->data_type()); - one.SetValue(1.0f); // TODO(wangwei) cast type + one.SetValue(1.0f); // TODO(wangwei) cast type Tensor vmat = Reshape(v, Shape{nb_row, 1}); Mult(alpha, vmat, one, beta, M); } } +template <> +void AddColumn(const float alpha, const float beta, const Tensor &v, Tensor *M); + void AddRow(const Tensor &v, Tensor *M) { AddRow(1, 1, v, M); } /// Sub column 'v' by each column of matrix M; write results into 'out' -void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M) { +template <typename SType> +void AddRow(const SType alpha, const SType beta, const Tensor &v, Tensor *M) { if (M->transpose()) { Tensor X = M->T(); AddColumn(v, &X); @@ -594,29 +543,8 @@ void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M) { Mult(alpha, one, vmat, beta, M); } } - -template <typename SType> Tensor Div(const SType alpha, const Tensor &in) { - Tensor out(in.shape(), in.device(), in.data_type()); - Div(alpha, in, &out); - return out; -} - -template Tensor Div<float>(const float, const Tensor &); - -template <typename SType> -void Div(const SType alpha, const Tensor &in, Tensor *out) { - CheckDataTypeAndLang(in, *out); - CHECK(in.shape() == out->shape()); - TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, { - // TODO(wangwei) type cast SType to DType; - in.device()->Exec( - [alpha, in, out](Context *ctx) { - Div<DType, Lang>(in.Size(), alpha, in.blob(), out->blob(), ctx); - }, - {in.blob()}, {out->blob()}); - }); -} -template void Div<float>(const float, const Tensor &, Tensor *); +template <> +void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M); /// Divide column 'v' by each column of matrix M; write results into 'out' void DivColumn(const Tensor &v, Tensor *M) { @@ -640,12 +568,10 @@ void MultColumn(const Tensor &v, Tensor *M) { CHECK_EQ(v.Size(), M->shape(0)); CheckDataTypeAndLang(*M, v); TYPE_LANG_SWITCH(v.data_type(), DType, v.device()->lang(), Lang, { - v.device()->Exec( - [M, v](Context *ctx) { - DGMM<DType, Lang>(false, M->shape(0), M->shape(1), M->blob(), - v.blob(), M->blob(), ctx); - }, - {M->blob(), v.blob()}, {M->blob()}); + v.device()->Exec([M, v](Context *ctx) { + DGMM<DType, Lang>(false, M->shape(0), M->shape(1), M->blob(), v.blob(), + M->blob(), ctx); + }, {M->blob(), v.blob()}, {M->blob()}); }); } @@ -657,12 +583,10 @@ void MultRow(const Tensor &v, Tensor *M) { CHECK_EQ(v.Size(), M->shape(1)); CheckDataTypeAndLang(*M, v); TYPE_LANG_SWITCH(v.data_type(), DType, v.device()->lang(), Lang, { - v.device()->Exec( - [M, v](Context *ctx) { - DGMM<DType, Lang>(true, M->shape(0), M->shape(1), M->blob(), v.blob(), - M->blob(), ctx); - }, - {M->blob(), v.blob()}, {M->blob()}); + v.device()->Exec([M, v](Context *ctx) { + DGMM<DType, Lang>(true, M->shape(0), M->shape(1), M->blob(), v.blob(), + M->blob(), ctx); + }, {M->blob(), v.blob()}, {M->blob()}); }); } @@ -680,8 +604,8 @@ void SumColumns(const Tensor &M, Tensor *v) { size_t nb_row = M.shape().at(0), nb_col = M.shape().at(1); CHECK_EQ(nb_row, v->Size()); - Tensor one(Shape{nb_col, 1}, M.device(), M.data_type()); - one.SetValue(1.0f); // TODO(wangwei) cast type + Tensor one(Shape{nb_col}, M.device(), M.data_type()); + one.SetValue(1.0f); // TODO(wangwei) cast type Mult(M, one, v); } } @@ -695,10 +619,98 @@ void SumRows(const Tensor &M, Tensor *v) { size_t nb_row = M.shape(0), nb_col = M.shape(1); CHECK_EQ(nb_col, v->Size()); - Tensor one(Shape{nb_row, 1}, M.device(), M.data_type()); - one.SetValue(1.0f); // TODO(wangwei) cast type + Tensor one(Shape{nb_row}, M.device(), M.data_type()); + one.SetValue(1.0f); // TODO(wangwei) cast type Tensor X = M.T(); Mult(X, one, v); } } +// ====================Random operations===================================== +template <typename SType> +void Bernoulli(const SType p, Tensor *out) { + TYPE_LANG_SWITCH(out->data_type(), DType, out->device()->lang(), Lang, { + auto prob = TypeCast<SType, DType>(p); + out->device()->Exec([prob, out](Context *ctx) { + Bernoulli<DType, Lang>(out->Size(), prob, out->blob(), ctx); + }, {}, {out->blob()}, true); + }); +} +template void Bernoulli<float>(const float p, Tensor *out); + +template <typename SType> +void Uniform(const SType low, const SType high, Tensor *out) { + TYPE_LANG_SWITCH(out->data_type(), DType, out->device()->lang(), Lang, { + auto l = TypeCast<SType, DType>(low); + auto h = TypeCast<SType, DType>(high); + out->device()->Exec([l, h, out](Context *ctx) { + Uniform<DType, Lang>(out->Size(), l, h, out->blob(), ctx); + }, {}, {out->blob()}, true); + }); +} +template void Uniform<float>(const float low, const float high, Tensor *out); + +template <typename SType> +void Gaussian(const SType mean, const SType std, Tensor *out) { + TYPE_LANG_SWITCH(out->data_type(), DType, out->device()->lang(), Lang, { + auto m = TypeCast<SType, DType>(mean); + auto s = TypeCast<SType, DType>(std); + out->device()->Exec([m, s, out](Context *ctx) { + Gaussian<DType, Lang>(out->Size(), m, s, out->blob(), ctx); + }, {}, {out->blob()}, true); + }); +} +template void Gaussian<float>(const float mean, const float std, Tensor *out); + +// ================Blas operations============================================ +template <typename SType> +void Axpy(const SType alpha, const Tensor &in, Tensor *out) { + TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, { + auto a = TypeCast<SType, DType>(alpha); + out->device()->Exec([a, in, out](Context *ctx) { + Axpy<DType, Lang>(in.Size(), a, in.blob(), out->blob(), ctx); + }, {in.blob(), out->blob()}, {out->blob()}); + }); +} +template <> +void Axpy(const float alpha, const Tensor &in, Tensor *out); + +Tensor Mult(const Tensor &A, const Tensor &B) { + Shape s; + s.push_back(A.shape(0)); + if (B.nDim() == 2) s.push_back(B.shape(1)); + Tensor out(s, A.device(), A.data_type()); + Mult(A, B, &out); + return out; +} + +void Mult(const Tensor &A, const Tensor &B, Tensor *out) { + Mult(1.0f, A, B, 0.0f, out); +} + +template <typename SType> +void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta, + Tensor *C) { + CHECK_EQ(A.shape().size(), 2u); + if (B.nDim() == 1u) { + TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, { + auto a = TypeCast<SType, DType>(alpha); + auto b = TypeCast<SType, DType>(beta); + C->device()->Exec([a, A, b, B, C](Context *ctx) { + GEMV<DType, Lang>(A.transpose(), A.shape(0), A.shape(1), a, A.blob(), + B.blob(), b, C->blob(), ctx); + }, {A.blob(), B.blob()}, {C->blob()}); + }); + } else { + CHECK(!C->transpose()); + TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, { + auto a = TypeCast<SType, DType>(alpha); + auto b = TypeCast<SType, DType>(beta); + C->device()->Exec([a, A, b, B, C](Context *ctx) { + GEMM<DType, Lang>(A.transpose(), B.transpose(), A.shape(0), B.shape(1), + A.shape(1), a, A.blob(), B.blob(), b, C->blob(), ctx); + }, {A.blob(), B.blob()}, {C->blob()}); + }); + } +} + } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/564c88ad/src/core/tensor/tensor_math.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h index 1bf6fc7..b5d0ba9 100644 --- a/src/core/tensor/tensor_math.h +++ b/src/core/tensor/tensor_math.h @@ -29,12 +29,14 @@ namespace singa { /// device programming language, e.g., Langice::kCpp, Langice::kCuda /// /// TODO(wangwei) Clean the functions to make the function APIs consistent: -/// 1. All function names should be like XxxYyy or XY, i.e., capitablize the first +/// 1. All function names should be like XxxYyy or XY, i.e., capitablize the +/// first /// letter. /// 2. Order functions based on function name in alphabetical order. -/// 3. Function arguments order is [const basic type] [const Blob] [mutable Blob]. +/// 3. Function arguments order is [const basic type] [const Blob] [mutable +/// Blob]. /// 4. Function argument names, use 'num' for total number of elements in -/// elementwise operations; use 'in1' 'in2' for input blobs; use 'out' for +/// elementwise operations; use 'in1' 'in2' for in blobs; use 'out' for /// output blob or value. With exceptions for some functions, e.g., /// Scale(const float alpha, const Blob* in, Blob* out); /// For such cases, use x, v, alpha, etc for scalar types. @@ -46,262 +48,283 @@ namespace singa { /// 7. Use size_t for the number of elements, rows or columns. /// 8. Use the same name for the Tensor and Blob level math functions. - -// ================Linear algebra functions==================================== -/// ret[i] = |input[i]| +// =============Element-wise operations==================================== +/// out[i] = |in[i]| template <typename DType, typename Lang> void Abs(const size_t num, const Blob *in, Blob *out, Context *ctx) { LOG(FATAL) << "Abs Not Implemented"; } +/// out = in + x template <typename DType, typename Lang> -void Set(const size_t num, const DType x, Blob *out, Context *ctx) { - LOG(FATAL) << "Set Not Implemented"; +void Add(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { + LOG(FATAL) << "Add Not Implemented"; } -/// sum all elements of input into ret +/// out = in1 + in2 template <typename DType, typename Lang> -void Sum(const size_t num, const Blob *in, DType *out, Context *ctx) { - LOG(FATAL) << "Sum Not Implemented"; +void Add(const size_t num, const Blob *in1, const Blob *in2, Blob *out, + Context *ctx) { + LOG(FATAL) << "Add-Pair Not Implemented"; } - -/// ret[i] = sign(input[i]) +/// Element-wise operation, clamp every element into [low, high] +/// if x>high, then x=high; if x<low, then x=low. template <typename DType, typename Lang> -void Sign(const size_t num, const Blob *in, Blob *out, Context *ctx) { - LOG(FATAL) << "Sign Not Implemented"; +void Clamp(const size_t num, const DType low, const DType high, const Blob *in, + Blob *out, Context *ctx) { + LOG(FATAL) << "Clamp Not Implemented"; } -/// Base is e, Neper number. ret[i]=exp(input[i]) +/// out = x / in template <typename DType, typename Lang> -void Exp(const size_t num, const Blob *in, Blob *out, Context *ctx) { - LOG(FATAL) << "Exp Not Implemented"; +void Div(const size_t num, const DType x, const Blob *in, Blob *out, + Context *ctx) { + LOG(FATAL) << "Div Not Implemented"; } -/// Natual logarithm, the base is e, Neper number ret[i]=log(input[i]). -template <typename DType, typename Lang> -void Log(const size_t num, const Blob *in, Blob *out, Context *ctx) { - LOG(FATAL) << "Log Not Implemented"; -} -/// Element-wise operation, ret[i]=sqrt([input[i]) template <typename DType, typename Lang> -void Sqrt(const size_t num, const Blob *in, Blob *out, Context *ctx) { - LOG(FATAL) << "Sqrt Not Implemented"; +void Div(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { + CHECK_NE(x, 0.f); + EltwiseMult<DType, Lang>(num, in, DType(1) / x, out, ctx); } -/// Element-wise operation, ret[i]=square([input[i]) +/// out = in1 / in2 template <typename DType, typename Lang> -void Square(const size_t num, const Blob *in, Blob *out, Context *ctx) { - LOG(FATAL) << "Square Not Implemented"; +void Div(const size_t num, const Blob *in1, const Blob *in2, Blob *out, + Context *ctx) { + LOG(FATAL) << "Div-Pair Not Implemented"; } -/// Element-wise operation, ret[i]=tanh([input[i]) +/// out = in * x template <typename DType, typename Lang> -void Tanh(const size_t num, const Blob *in, Blob *out, Context *ctx) { - LOG(FATAL) << "Tanh Not Implemented"; +void EltwiseMult(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { + LOG(FATAL) << "EltwiseMult Not Implemented"; } -/// Element-wise operation, ret[i]=max(0, input[i]) + +/// out = in2 * in2 template <typename DType, typename Lang> -void ReLU(const size_t num, const Blob *in, Blob *out, Context *ctx) { - LOG(FATAL) << "ReLU Not Implemented"; +void EltwiseMult(const size_t num, const Blob *in1, const Blob *in2, Blob *out, + Context *ctx) { + LOG(FATAL) << "EltwiseMult-Pair Not Implemented"; } -/// Element-wise operation, ret[i]=sigmoid([input[i]) + +/// Base is e, Neper number. out[i]=exp(in[i]) template <typename DType, typename Lang> -void Sigmoid(const size_t num, const Blob *in, Blob *out, Context *ctx) { - LOG(FATAL) << "Sigmoid Not Implemented"; +void Exp(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Exp Not Implemented"; } -// Do softmax for each row invidually +/// out[i]=(in[i]<=x)?1.f:0.f template <typename DType, typename Lang> -void Softmax(const size_t nrow, const size_t ncol, const Blob *in, - Blob *out, Context *ctx) { - LOG(FATAL) << "Softmax Not Implemented"; +void LE(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { + LOG(FATAL) << "LE Not Implemented"; } - -// TODO(wangwei) unify SumRow and SumCol. -/// Sum the rows of the input matrix into a vector +/// Natual logarithm, the base is e, Neper number out[i]=log(in[i]). template <typename DType, typename Lang> -void SumRows(const size_t nrow, const size_t ncol, const Blob *in, - Blob *out, Context *ctx) { - LOG(FATAL) << "SumRows Not Implemented"; +void Log(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Log Not Implemented"; } - -/// Sum the columns of the input matrix into a vector +/// out[i]=(in[i]<x)?1.f:0.f template <typename DType, typename Lang> -void SumColumns(const size_t nrow, const size_t ncol, const Blob *in, - Blob *out, Context *ctx) { - LOG(FATAL) << "SumColumns Not Implemented"; +void LT(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { + LOG(FATAL) << "LT Not Implemented"; } - -// TODO(wangwei) unify AddRow and AddCol. -/// Add the vector v to every row of A as the row of out +/// out[i]=(in[i]>=x)?1.f:0.f template <typename DType, typename Lang> -void AddRow(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v, - Blob *out, Context *ctx) { - LOG(FATAL) << "AddRow Not Implemented"; +void GE(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { + LOG(FATAL) << "GE Not Implemented"; } - -/// Add the vector v to every column of A as the column of out +/// out[i]=(in[i]>x)?1.f:0.f template <typename DType, typename Lang> -void AddCol(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v, - Blob *out, Context *ctx) { - LOG(FATAL) << "AddCol Not Implemented"; +void GT(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { + LOG(FATAL) << "GT Not Implemented"; } - -/// Element-wise operation, do v^x for every v from the input tensor +/// Element-wise operation, do v^x for every v from the in tensor template <typename DType, typename Lang> -void Pow(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { +void Pow(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { LOG(FATAL) << "Pow Not Implemented"; } /// Element-wise operation, do v^x for every v from the lhs and every x from rhs template <typename DType, typename Lang> -void Pow(const size_t num, const Blob *in1, const Blob *in2, - Blob *out, Context *ctx) { +void Pow(const size_t num, const Blob *in1, const Blob *in2, Blob *out, + Context *ctx) { LOG(FATAL) << "Pow-Pair Not Implemented"; } -/// Element-wise operation, clamp every element into [low, high] -/// if x>high, then x=high; if x<low, then x=low. +/// Element-wise operation, out[i]=max(0, in[i]) template <typename DType, typename Lang> -void Clamp(const size_t num, const DType low, const DType high, const Blob *in, Blob *out, Context *ctx) { - LOG(FATAL) << "Clamp Not Implemented"; +void ReLU(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "ReLU Not Implemented"; } -/// ret = input + x template <typename DType, typename Lang> -void Add(const size_t num, const Blob *in, const DType x, - Blob *out, Context *ctx) { - LOG(FATAL) << "Add Not Implemented"; +void Set(const size_t num, const DType x, Blob *out, Context *ctx) { + LOG(FATAL) << "Set Not Implemented"; } - -/// ret = lhs + rhs +/// Element-wise operation, out[i]=sigmoid([in[i]) template <typename DType, typename Lang> -void Add(const size_t num, const Blob *in1, const Blob *in2, - Blob *out, Context *ctx) { - LOG(FATAL) << "Add-Pair Not Implemented"; +void Sigmoid(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Sigmoid Not Implemented"; } -/// ret = input - x +/// out[i] = sign(in[i]) template <typename DType, typename Lang> -void Sub(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { - Add<DType, Lang>(num, in, -x, out, ctx); +void Sign(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Sign Not Implemented"; } - -/// ret = lhs - rhs +/// Element-wise operation, out[i]=sqrt([in[i]) template <typename DType, typename Lang> -void Sub(const size_t num, const Blob *in1, const Blob *in2, - Blob *out, Context *ctx) { - LOG(FATAL) << "Sub-Pair Not Implemented"; +void Sqrt(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Sqrt Not Implemented"; } -/// ret = input * x +/// Element-wise operation, out[i]=square([in[i]) template <typename DType, typename Lang> -void EltwiseMult(const size_t num, const Blob *in, const DType x, Blob *out, - Context *ctx) { - LOG(FATAL) << "EltwiseMult Not Implemented"; +void Square(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Square Not Implemented"; } -/// ret = lhs * rhs +/// out = in - x template <typename DType, typename Lang> -void EltwiseMult(const size_t num, const Blob *in1, const Blob *in2, - Blob *out, Context *ctx) { - LOG(FATAL) << "EltwiseMult-Pair Not Implemented"; +void Sub(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { + Add<DType, Lang>(num, in, -x, out, ctx); } -/// ret = input / x +/// out = in1 - in2 template <typename DType, typename Lang> -void Div(const size_t num, const DType x, const Blob *in, - Blob *out, Context *ctx) { - LOG(FATAL) << "Div Not Implemented"; +void Sub(const size_t num, const Blob *in1, const Blob *in2, Blob *out, + Context *ctx) { + LOG(FATAL) << "Sub-Pair Not Implemented"; } - +/// sum all elements of in into out template <typename DType, typename Lang> -void Div(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { - CHECK_NE(x,0.f); - EltwiseMult<DType, Lang>(num, in, DType(1) / x, out, ctx); +void Sum(const size_t num, const Blob *in, DType *out, Context *ctx) { + LOG(FATAL) << "Sum Not Implemented"; } -/// ret = lhs / rhs +/// Element-wise operation, out[i]=tanh([in[i]) template <typename DType, typename Lang> -void Div(const size_t num, const Blob *in1, const Blob *in2, - Blob *out, Context *ctx) { - LOG(FATAL) << "Div-Pair Not Implemented"; +void Tanh(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Tanh Not Implemented"; } +// =========== Matrix operations =========================================== +/// Add the vector v to every column of A as the column of out +template <typename DType, typename Lang> +void AddCol(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v, + Blob *out, Context *ctx) { + LOG(FATAL) << "AddCol Not Implemented"; +} +// TODO(wangwei) unify AddRow and AddCol. +/// Add the vector v to every row of A as the row of out +template <typename DType, typename Lang> +void AddRow(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v, + Blob *out, Context *ctx) { + LOG(FATAL) << "AddRow Not Implemented"; +} /// outer-product. -/// lhs and rhs are vectors of len m and n. ret is matrix of shape m * n +/// in1 and in2 are vectors of len m and n. out is matrix of shape m * n template <typename DType, typename Lang> -void Outer(const size_t m, const size_t n, const Blob *in1, const Blob *in2, - Blob *out, Context *ctx) { +void Outer(const size_t m, const size_t n, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { LOG(FATAL) << "Outer Not Implemented"; } - -/// ret[i]=(input[i]<x)?1.f:0.f +// Do softmax for each row invidually template <typename DType, typename Lang> -void LT(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { - LOG(FATAL) << "LT Not Implemented"; +void Softmax(const size_t nrow, const size_t ncol, const Blob *in, Blob *out, + Context *ctx) { + LOG(FATAL) << "Softmax Not Implemented"; } -/// ret[i]=(input[i]<=x)?1.f:0.f +/// Sum the columns of the in matrix into a vector template <typename DType, typename Lang> -void LE(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { - LOG(FATAL) << "LE Not Implemented"; +void SumColumns(const size_t nrow, const size_t ncol, const Blob *in, Blob *out, + Context *ctx) { + LOG(FATAL) << "SumColumns Not Implemented"; } -/// ret[i]=(input[i]>x)?1.f:0.f +// TODO(wangwei) unify SumRow and SumCol. +/// Sum the rows of the in matrix into a vector template <typename DType, typename Lang> -void GT(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { - LOG(FATAL) << "GT Not Implemented"; +void SumRows(const size_t nrow, const size_t ncol, const Blob *in, Blob *out, + Context *ctx) { + LOG(FATAL) << "SumRows Not Implemented"; +} + +// ================Random functions=========================================== +/// Each element of out would be 1 with prob p and 0 with 1-p. 0<= p <= 1 +// Get the random generator from 'ctx' +// If DType is not float, then convert the threshold to DType +template <typename DType, typename Lang> +void Bernoulli(const size_t num, const float p, Blob *out, Context *ctx) { + LOG(FATAL) << "Bernoulli Not Implemented"; } -/// ret[i]=(input[i]>=x)?1.f:0.f +// The random generator should be extracted from ctx. +// If DType is not float, then convert the mean and std to DType template <typename DType, typename Lang> -void GE(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { - LOG(FATAL) << "GE Not Implemented"; +void Gaussian(const size_t num, const float mean, const float std, Blob *out, + Context *ctx) { + LOG(FATAL) << "Gaussian Not Implemented"; +} +// The random generator should be extracted from ctx. +// If DType is not float, then convert the low and high to DType +template <typename DType, typename Lang> +void Uniform(const size_t num, const float low, const float high, Blob *out, + Context *ctx) { + LOG(FATAL) << "Uniform Not Implemented"; } // ===== BLAS functions, ref to http://docs.nvidia.com/cuda/cublas -// ===== Level 1 -/// return the index of the element with the max value. +/// outurn the index of the element with the max value. template <typename DType, typename Lang> void Amax(const size_t num, const Blob *in, size_t *out, Context *ctx) { LOG(FATAL) << "Amax Not Implemented"; } -/// return the index of the element with the min value. +/// outurn the index of the element with the min value. template <typename DType, typename Lang> void Amin(const size_t num, const Blob *in, size_t *out, Context *ctx) { LOG(FATAL) << "Amin Not Implemented"; } -/// ret = sum |x| for all x in input +/// out = sum |x| for all x in in template <typename DType, typename Lang> void Asum(const size_t num, const Blob *in, DType *out, Context *ctx) { LOG(FATAL) << "Asum Not Implemented"; } -/// ret = alpha * input + ret +/// out = alpha * in + out template <typename DType, typename Lang> -void Axpy(const size_t num, const DType alpha, const Blob *in, - Blob *out, Context *ctx) { +void Axpy(const size_t num, const DType alpha, const Blob *in, Blob *out, + Context *ctx) { LOG(FATAL) << "Axpy Not Implemented"; } -/// ret *= x +/// out *= x template <typename DType, typename Lang> void Scale(const size_t num, const DType x, Blob *out, Context *ctx) { LOG(FATAL) << "Scale Not Implemented"; } template <typename DType, typename Lang> -void Dot(const size_t num, const Blob *in1, const Blob *in2, - DType *out, Context *ctx) { +void Dot(const size_t num, const Blob *in1, const Blob *in2, DType *out, + Context *ctx) { LOG(FATAL) << "Dot Not Implemented"; } -// ===== Level 2 -/// ret = alpha * op(A) * v + beta * ret. -/// op(A) = A if trans = false; A^T otherwise; rows(op(A)) = m, cols(op(A)) = n. +/// out = alpha * A * v + beta * out. +/// transA indicates if the internal data layout is transposed of A template <typename DType, typename Lang> -void GEMV(bool trans, const size_t m, const size_t n, const DType alpha, - const Blob *A, const Blob *v, - const DType beta, Blob *out, Context *ctx) { +void GEMV(bool trans, const size_t m, const size_t n, const DType alpha, + const Blob *A, const Blob *v, const DType beta, Blob *out, + Context *ctx) { LOG(FATAL) << "GEMV Not Implemented"; } @@ -323,34 +346,5 @@ void GEMM(const bool transA, const bool transB, const size_t nrowA, LOG(FATAL) << "GEMM Not Implemented"; } - -// ===== Level 3 - -// ================Random functions=========================================== -/// Each element of ret would be 1 with prob p and 0 with 1-p. 0<= p <= 1 -// Get the random generator from 'ctx' -// If DType is not float, then convert the threshold to DType -template <typename DType, typename Lang> -void Bernoulli(const size_t num, const float p, Blob *out, Context *ctx) { - LOG(FATAL) << "Bernoulli Not Implemented"; -} -// The random generator should be extracted from ctx. -// If DType is not float, then convert the low and high to DType -template <typename DType, typename Lang> -void Uniform(const size_t num, const float low, const float high, - Blob *out, Context *ctx) { - LOG(FATAL) << "Uniform Not Implemented"; -} -// The random generator should be extracted from ctx. -// If DType is not float, then convert the mean and std to DType -template <typename DType, typename Lang> -void Gaussian(const size_t num, const float mean, const float std, - Blob *out, Context *ctx) { - LOG(FATAL) << "Gaussian Not Implemented"; -} - - - - } // namespace singa #endif // SINGA_CORE_MATH_H_
