Repository: incubator-singa Updated Branches: refs/heads/master e248e447b -> 7a19e63db
SINGA-362 Add functions to support einsum function Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/16c61112 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/16c61112 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/16c61112 Branch: refs/heads/master Commit: 16c61112948099fe4279ca11651f88456cbc690d Parents: c67c3b6 Author: sheyujian <[email protected]> Authored: Mon May 21 15:44:15 2018 +0800 Committer: sheyujian <[email protected]> Committed: Mon May 21 21:19:56 2018 +0800 ---------------------------------------------------------------------- include/singa/core/device.h | 5 ++ include/singa/core/tensor.h | 8 ++ python/singa/tensor.py | 188 ++++++++++++++++++++++++++++++++++++++- src/core/device/device.cc | 23 +++++ src/core/tensor/tensor.cc | 89 ++++++++++++++++++ 5 files changed, 312 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/include/singa/core/device.h ---------------------------------------------------------------------- diff --git a/include/singa/core/device.h b/include/singa/core/device.h index 1a960d8..24569f4 100644 --- a/include/singa/core/device.h +++ b/include/singa/core/device.h @@ -75,6 +75,11 @@ class Device { virtual void CopyDataToFrom(Block* dst, Block* src, size_t nBytes, CopyDirection direction, int dst_offset, int src_offset); + virtual void RepeatDataToFrom(Block* dst, Block* src, size_t nBytes, + CopyDirection direct, bool broadcast_flag, + int axis_shape, int shape_outer, int chunk, + vector<int> repeats, int dst_offset, int src_offset); + void CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes, size_t dst_offset = 0); /// Submit the operation to the device, which may execute it right now or http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/include/singa/core/tensor.h ---------------------------------------------------------------------- diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h index 3cc28ff..c7958ff 100644 --- a/include/singa/core/tensor.h +++ b/include/singa/core/tensor.h @@ -165,6 +165,8 @@ class Tensor { /// Meta data would not be copied! void CopyData(const Tensor &other); + void RepeatData(vector<int> repeats, int axis, int total_repeats, const Tensor &other); + /// Deserialize data, shape and transpose from protobuf object. void FromProto(const singa::TensorProto &proto); @@ -175,6 +177,8 @@ class Tensor { /// device. If 'device' is nullptr, then clone it one the current device. Tensor Clone(std::shared_ptr<Device> device = nullptr) const; + Tensor Repeat(vector<int> repeats, int axis, std::shared_ptr<Device> device = nullptr) ; + // Tensor operations /// Matrix transpose. Valid only if shape.size() == 2. @@ -287,6 +291,10 @@ Tensor Reshape(const Tensor &in, Shape &&s); void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num, const size_t dst_offset = 0, const size_t src_offset = 0); +void RepeatDataToFrom(bool broadcast_flag, vector<int> repeats, int axis, + Tensor *dst, const Tensor &in, const size_t num, + const size_t dst_offset = 0, const size_t src_offset = 0); + // =============Element-wise operations==================================== Tensor Abs(const Tensor &in); Tensor Exp(const Tensor &in); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/python/singa/tensor.py ---------------------------------------------------------------------- diff --git a/python/singa/tensor.py b/python/singa/tensor.py index 8f36775..ff7206c 100644 --- a/python/singa/tensor.py +++ b/python/singa/tensor.py @@ -71,6 +71,8 @@ float32 = core_pb2.kFloat32 CTensor = singa.Tensor + + class Tensor(object): '''Python Tensor, which wraps a swig converted Tensor from CPP Tensor. @@ -1073,9 +1075,193 @@ def einsum(ops, *args): transpose_res = [sorted(list(outputops)).index(x) for x in list(outputops)] res_ = res_.transpose(transpose_res) res = from_numpy(res_) - return res +def sum2(t, axis=None, out=None): + '''Sum of tensor elements over given axis + + Args: + t: Singa.tensor + The array_like tensor to be sumed + axis: None or int or tuple of ints, optional + Axis or axes along which a sum is performed. + The default, axis=None, will sum all of the elements of the input array. + If axis is negative it counts from the last to the first axis. + If axis is a tuple of ints, a sum is performed on all of the axes specified + in the tuple instead of a single axis or all the axes as before. + out:Singa.tensor optional + Alternative output array in which to place the result. + It must have the same shape as the expected output, + but the type of the output values will be cast if necessary. + + Return: sum_along_axis: tensor + A tensor with the same shape as t, with the specified axis removed. + If a is a 0-d array, or if axis is None, a scalar is returned. + If an output array is specified, a reference to out is returned + ''' + + t_shape = t.shape + t_ndim = t.ndim() + + if axis is None: + one = Tensor(t.shape, t.device, t.dtype) + one.set_value(1.0) + ret = tensordot(t, one, t_ndim) + + if isinstance(axis,int): + if axis < 0: + axis += 2 + + axis_shape = t_shape[axis] + one = Tensor(axis_shape, t.device, t.dtype) + one.set_value(1.0) + ret = tensordot(t, one, axes=([axis],[0])) + + if isinstance(axis,tuple): + l_axis = list(axis) + axis_shape = [t_shape[x] for x in axis] + one = Tensor(axis_shape, t.device, t.dtype) + one.set_value(1.0) + one_axis = [x for x in range(one.ndim())] + ret = tensordot(t, one, (l_axis,one_axis)) + + if out is not None: + if out.shape != ret.shape: + raise ValueError('dimensions do not match') + out[:] = ret + return out + else: + return ret + +def repeat(t, repeats, axis = None): + if isinstance(repeats, int): + if repeats < 0: + raise ValueError("'repeats' should not be negative: {}".format(repeats)) + # broadcast = True + if axis == None: + axis = 9999 + if axis < 0: + axis += 2 + ret = singa.Repeat(t, list(repeats), axis) + elif isinstance(repeats, tuple) or isinstance(repeats, list): + for rep in repeats: + if rep < 0: + raise ValueError("'repeats' should be int or sequence: {}".format(repeats)) + if axis == None: + axis = 9999 + if axis < 0: + axis += 2 + ret = singa.Repeat(t, list(repeats), axis) + t_shape = t.shape + t_shape[axis] = sum(repeats) + ret = ret.reshape(t_shape) + else: + raise ValueError('repeats should be int or sequence') + return ret + +def tensordot (A,B,axes=2): + + """Returns the tensor multiplication of two tensors along specified axes. + + This is equivalent to compute dot product along the specified axes which + are treated as one axis by reshaping. + + Args: + A: Singa.Tensor + B: Singa.Tensor + axes: + - If it is an integer, then ''axes'' represent axes at the last of ''a`'' and + the first of ''b'' are used. + - If it is a pair of sequences of integers, then these two + sequences specify the list of axes for ''a'' and ''b''. The + corresponding axes are paired for sum-product. + + Return: + singa.tensor: The tensor product of ''A'' and ''B'' along the + axes specified by ''axes''. + + Thanks to numpy.tensordot. + the link is https://github.com/numpy/numpy/blob/v1.14.0/numpy/core/numeric.py#L1123-L1306 + """ + # when axes is an integer, axes_A and axes_B represent axes at the last of ''A'' and + # the first of ''B''. For example, when axes is 1, we do the normal multiplication : + # if A is in shape(3,2,4), B is in shape(4,2,5), it will return a matrix in shape(3,2,2,5) + #when axes is 2 and A,B are shape (3,2,4) and (2,4,5), it will return a matrix in shape(3,5) + + if type(axes) == int: + axes_A = list(range(-axes, 0)) + axes_B = list(range(0, axes)) + axes_B = axes_B + else: + axes_A,axes_B =axes + # when axes is a pair of sequences of integers.For example, A is in shape(3,2,4), + #B is in shape(4,2,5), we set axes as ([1,2],[1,0]), it will return a matrix in shape(3,5) + if isinstance(axes_A,list): + na = len(axes_A) + axes_A = list(axes_A) + else: + axes_A = [axes_A] + na = 1 + if isinstance(axes_B,list): + nb = len(axes_B) + axes_B = list(axes_B) + else: + axes_B = [axes_B] + nb = 1 + + # a_shape and b_shape are the shape of tensor A and B, while nda and ndb are the dim of A and B + a_shape = A.shape + nda = A.ndim() + b_shape = B.shape + ndb = B.ndim() + equal = True + # to check if the length of axe_A is equal to axes_B + if na != nb: + equal = False + else: + # to make the shape match + for k in range(na): + if a_shape[axes_A[k]] != b_shape[axes_B[k]]: + equal = False + break + if axes_A[k] < 0: + axes_A[k] += nda + if axes_B[k] < 0: + axes_B[k] += ndb + if not equal: + raise ValueError("shape-mismatch for sum") + '''start to do the calculation according to the axes''' + + notin = [k for k in range(nda) if k not in axes_A] + # nda is the dim of A, and axes_a is the axis for A, notin is the axis which is not in axes_A + newaxes_a = notin + axes_A + N2 = 1 + for axis in axes_A: + N2 *= a_shape[axis] + N1 = 1 + for ax in notin: + N1 *=a_shape[ax] + # newshape_a is the shape to do multiplication.For example, A is in shape(3,2,4), + #B is in shape(4,2,5), we set axes as ([1,2],[1,0]), then newshape_a should be (3,5) + #olda is the shape that will be shown in the result. + newshape_a = (N1,N2) + olda = [a_shape[axis] for axis in notin] + notin = [k for k in range(ndb) if k not in axes_B] + newaxes_b = axes_B + notin + N2 = 1 + for axis in axes_B: + N2 *= b_shape[axis] + N1 = 1 + for bx in notin: + N1 *= b_shape[bx] + newshape_b = (N2, N1) + oldb = [b_shape[axis] for axis in notin] + # do transpose and reshape to get the 2D matrix to do multiplication + at = A.transpose(newaxes_a).reshape(newshape_a) + bt = B.transpose(newaxes_b).reshape(newshape_b) + res = mult(at, bt) + #reshape the result + return res.reshape(olda + oldb) def div(lhs, rhs, ret=None): '''Elementi-wise division. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/src/core/device/device.cc ---------------------------------------------------------------------- diff --git a/src/core/device/device.cc b/src/core/device/device.cc index cda1b9f..d569015 100644 --- a/src/core/device/device.cc +++ b/src/core/device/device.cc @@ -64,6 +64,29 @@ void Device::CopyDataToFrom(Block* dst, Block* src, size_t nBytes, {src}, {dst}); } +void Device::RepeatDataToFrom(Block* dst, Block* src, size_t nBytes, + CopyDirection direct, bool broadcast_flag, + int axis_shape, int shape_outer, int chunk, + vector<int> repeats, int dst_offset, int src_offset) { + const char *src_data = reinterpret_cast<const char*>(src->data()) + dst_offset; + char *dst_data = reinterpret_cast<char*>(dst->mutable_data()) + src_offset; + + for (int i = 0; i < shape_outer; i++) { + for (int j = 0; j < axis_shape; j++) { + int temp = broadcast_flag ? repeats[0] : repeats[j]; + for (int k = 0; k < temp; k++) { + this->Exec( + [this, dst_data, src_data, direct, chunk, repeats](Context* ctx) { + this->CopyToFrom(dst_data, src_data, chunk, direct, ctx); + }, + {src}, {dst}); + dst_data += chunk; + } + src_data += chunk; + } + } +} + void Device::CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes, size_t dst_offset) { auto direct = lang_ == kCpp ? kHostToHost : kHostToDevice; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/src/core/tensor/tensor.cc ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc index de0d7d2..22541df 100644 --- a/src/core/tensor/tensor.cc +++ b/src/core/tensor/tensor.cc @@ -22,6 +22,8 @@ #include "./tensor_math_opencl.h" #include <utility> +#define Noaxis 9999 + namespace singa { Tensor::~Tensor() { @@ -214,6 +216,20 @@ void Tensor::CopyData(const Tensor &src) { } } +void Tensor::RepeatData(vector<int> repeats, int axis, int total_repeats, const Tensor &src) { + if(axis == Noaxis) { + CHECK_EQ(Size(), src.Size()*total_repeats); + } else { + CHECK_EQ(Size(), src.Size()*total_repeats/src.shape()[axis]); + } + + CHECK(block_ != nullptr); + // Do repeat only if the src's block is already initialized. + if (src.block_ != nullptr) { + singa::RepeatDataToFrom(false, repeats, axis, this, src, Size(), 0, 0); + } +} + void Tensor::FromProto(const singa::TensorProto &proto) { if (block_ != nullptr && block_->DecRefCount() == 0) device_->FreeBlock(block_); @@ -329,6 +345,29 @@ Tensor Tensor::Clone(std::shared_ptr<Device> device) const { return t; } +Tensor Tensor::Repeat(vector<int> repeats, int axis, std::shared_ptr<Device> device) { + if (device == nullptr) device = device_; + Tensor t; + int total_repeats = 0; + if (axis == Noaxis) { + total_repeats = repeats[0]; + t.shape_.push_back(Product(shape_)*total_repeats); + } else { + for (size_t i = 0; i < shape_[axis]; i++) { + if(repeats[i] < 0) { + LOG(FATAL) << "the repeats number is less than zero"; + } + total_repeats += repeats[i]; + t.shape_.push_back(Product(shape_)/shape_[axis]*total_repeats); + } + } + t.device_ = device_; + t.data_type_ = data_type_; + t.strides_.push_back(1); + t.RepeatData(repeats, axis, total_repeats, *this); + return t; +} + //yisen todo Tensor Tensor::T() const { // this function only works for 2d tensors @@ -482,6 +521,56 @@ void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num, src_dev->CopyDataToFrom(to, from, nBytes, direct, (int)d_offset, (int)s_offset); } } + +void RepeatDataToFrom(bool broadcast_flag, vector<int> repeats, int axis, + Tensor *dst, const Tensor &src, const size_t num, + const size_t dst_offset, const size_t src_offset) { + if (repeats.size() == 1) { + broadcast_flag = true; + } + if (repeats.size() > 1) { + if (axis == Noaxis) { + LOG(FATAL) << "When repeats parameter is sequence, axis cannot be None"; + } + } + for (size_t i = 0; i < repeats.size(); i++){ + CHECK_GE(repeats[i], 0); + } + auto width = SizeOf(src.data_type()); + CHECK_EQ(width, SizeOf(dst->data_type())); + size_t nBytes = num * width; + auto d_offset = dst_offset * width; + auto s_offset = src_offset * width; + int chunk = width; + int axis_shape = 1; + if (axis == Noaxis){ + axis_shape = 1; + } else { + axis_shape = src.shape()[axis]; + for(size_t i = axis + 1; i < src.nDim(); i++) { + chunk *= src.shape()[i]; + } + } + int shape_outer = Product(src.shape()); + std::shared_ptr<Device> src_dev = src.device(), dst_dev = dst->device(); + Block *from = src.block(), *to = dst->block(); + if (dst_dev->lang() != src_dev->lang()) { + // let the none cpp device conduct copy op + if (dst_dev->lang() == kCpp) { + src_dev->RepeatDataToFrom(to, from, nBytes, kDeviceToHost, broadcast_flag, axis_shape, + shape_outer, chunk, repeats, (int)d_offset, (int)s_offset); + } else if (src_dev->lang() == kCpp) { + dst_dev->RepeatDataToFrom(to, from, nBytes, kHostToDevice, broadcast_flag, axis_shape, + shape_outer, chunk, repeats, (int)d_offset, (int)s_offset); + } else { + LOG(FATAL) << "Not support mem repeat copy betwee Cuda and OpenCL device"; + } + } else { + auto direct = src_dev->lang() == kCpp ? kHostToHost : kDeviceToDevice; + src_dev->RepeatDataToFrom(to, from, nBytes, direct, broadcast_flag, axis_shape, + shape_outer, chunk, repeats, (int)d_offset, (int)s_offset); + } +} //============================================================================ /// typedef DType accroding to type value. /// DType would be used in the code block __VA_ARGS__.
