Repository: incubator-singa
Updated Branches:
  refs/heads/master e248e447b -> 7a19e63db


SINGA-362 Add functions to support einsum function


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/16c61112
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/16c61112
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/16c61112

Branch: refs/heads/master
Commit: 16c61112948099fe4279ca11651f88456cbc690d
Parents: c67c3b6
Author: sheyujian <[email protected]>
Authored: Mon May 21 15:44:15 2018 +0800
Committer: sheyujian <[email protected]>
Committed: Mon May 21 21:19:56 2018 +0800

----------------------------------------------------------------------
 include/singa/core/device.h |   5 ++
 include/singa/core/tensor.h |   8 ++
 python/singa/tensor.py      | 188 ++++++++++++++++++++++++++++++++++++++-
 src/core/device/device.cc   |  23 +++++
 src/core/tensor/tensor.cc   |  89 ++++++++++++++++++
 5 files changed, 312 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index 1a960d8..24569f4 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -75,6 +75,11 @@ class Device {
   virtual void CopyDataToFrom(Block* dst, Block* src, size_t nBytes,
                       CopyDirection direction, int dst_offset, int src_offset);
 
+  virtual void RepeatDataToFrom(Block* dst, Block* src, size_t nBytes,
+                                CopyDirection direct, bool broadcast_flag, 
+                                int axis_shape, int shape_outer, int chunk, 
+                                vector<int> repeats, int dst_offset, int 
src_offset);
+
   void CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes,
                            size_t dst_offset = 0);
   /// Submit the operation to the device, which may execute it right now or

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index 3cc28ff..c7958ff 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -165,6 +165,8 @@ class Tensor {
   /// Meta data would not be copied!
   void CopyData(const Tensor &other);
 
+  void RepeatData(vector<int> repeats, int axis, int total_repeats, const 
Tensor &other);
+
   /// Deserialize data, shape and transpose from protobuf object.
   void FromProto(const singa::TensorProto &proto);
 
@@ -175,6 +177,8 @@ class Tensor {
   /// device. If 'device' is nullptr, then clone it one the current device.
   Tensor Clone(std::shared_ptr<Device> device = nullptr) const;
 
+  Tensor Repeat(vector<int> repeats, int axis, std::shared_ptr<Device> device 
= nullptr) ;
+
   // Tensor operations
 
   /// Matrix transpose.  Valid only if shape.size() == 2.
@@ -287,6 +291,10 @@ Tensor Reshape(const Tensor &in, Shape &&s);
 void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
                     const size_t dst_offset = 0, const size_t src_offset = 0);
 
+void RepeatDataToFrom(bool broadcast_flag,  vector<int> repeats, int axis, 
+                      Tensor *dst, const Tensor &in, const size_t num, 
+                      const size_t dst_offset = 0, const size_t src_offset = 
0);
+
 // =============Element-wise operations====================================
 Tensor Abs(const Tensor &in);
 Tensor Exp(const Tensor &in);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index 8f36775..ff7206c 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -71,6 +71,8 @@ float32 = core_pb2.kFloat32
 CTensor = singa.Tensor
 
 
+
+
 class Tensor(object):
     '''Python Tensor, which wraps a swig converted Tensor from CPP Tensor.
 
@@ -1073,9 +1075,193 @@ def einsum(ops, *args):
     transpose_res = [sorted(list(outputops)).index(x) for x in list(outputops)]
     res_ = res_.transpose(transpose_res)
     res = from_numpy(res_)
-
     return res
 
+def sum2(t, axis=None, out=None):
+    '''Sum of tensor elements over given axis
+
+    Args:
+        t: Singa.tensor
+            The array_like tensor to be sumed
+        axis: None or int or tuple of ints, optional
+            Axis or axes along which a sum is performed.
+            The default, axis=None, will sum all of the elements of the input 
array.
+            If axis is negative it counts from the last to the first axis.
+            If axis is a tuple of ints, a sum is performed on all of the axes 
specified
+            in the tuple instead of a single axis or all the axes as before.
+        out:Singa.tensor optional
+            Alternative output array in which to place the result.
+            It must have the same shape as the expected output,
+            but the type of the output values will be cast if necessary.
+
+    Return: sum_along_axis: tensor
+        A tensor with the same shape as t, with the specified axis removed.
+        If a is a 0-d array, or if axis is None, a scalar is returned.
+        If an output array is specified, a reference to out is returned
+    '''
+
+    t_shape = t.shape
+    t_ndim = t.ndim()
+
+    if axis is None:
+        one = Tensor(t.shape, t.device, t.dtype)
+        one.set_value(1.0)
+        ret = tensordot(t, one, t_ndim)
+
+    if isinstance(axis,int):
+        if axis < 0:
+            axis += 2
+
+        axis_shape = t_shape[axis]
+        one = Tensor(axis_shape, t.device, t.dtype)
+        one.set_value(1.0)
+        ret = tensordot(t, one, axes=([axis],[0]))
+
+    if isinstance(axis,tuple):
+        l_axis = list(axis)
+        axis_shape = [t_shape[x] for x in axis]
+        one = Tensor(axis_shape, t.device, t.dtype)
+        one.set_value(1.0)
+        one_axis = [x for x in range(one.ndim())]
+        ret = tensordot(t, one, (l_axis,one_axis))
+
+    if out is not None:
+        if out.shape != ret.shape:
+            raise ValueError('dimensions do not match')
+        out[:] = ret
+        return out
+    else:
+        return ret
+
+def repeat(t, repeats, axis = None):
+    if isinstance(repeats, int):
+        if repeats < 0:
+            raise ValueError("'repeats' should not be negative: 
{}".format(repeats))
+        # broadcast = True
+        if axis == None:
+            axis = 9999
+        if axis < 0:
+            axis += 2
+        ret = singa.Repeat(t, list(repeats), axis)
+    elif isinstance(repeats, tuple) or isinstance(repeats, list):
+        for rep in repeats:
+            if rep < 0:
+                raise ValueError("'repeats' should be int or sequence: 
{}".format(repeats))
+        if axis == None:
+            axis = 9999
+        if axis < 0:
+            axis += 2
+        ret = singa.Repeat(t, list(repeats), axis)
+        t_shape = t.shape
+        t_shape[axis] = sum(repeats)
+        ret = ret.reshape(t_shape)
+    else:
+        raise ValueError('repeats should be int or sequence')
+    return ret
+
+def tensordot (A,B,axes=2):
+
+    """Returns the tensor multiplication of two tensors along specified axes.
+
+    This is equivalent to compute dot product along the specified axes which
+    are treated as one axis by reshaping.
+
+    Args:
+        A: Singa.Tensor
+        B: Singa.Tensor
+        axes:
+            - If it is an integer, then ''axes'' represent axes at the last of 
''a`'' and
+              the first of ''b'' are used.
+            - If it is a pair of sequences of integers, then these two
+              sequences specify the list of axes for ''a'' and ''b''. The
+              corresponding axes are paired for sum-product.
+
+    Return:
+        singa.tensor: The tensor  product of ''A'' and ''B'' along the
+        axes specified by ''axes''.
+
+    Thanks to numpy.tensordot.
+    the link is 
https://github.com/numpy/numpy/blob/v1.14.0/numpy/core/numeric.py#L1123-L1306
+    """
+    # when axes is an integer, axes_A and axes_B represent axes at the last of 
''A'' and
+    # the first of ''B''. For example, when axes is 1, we do the normal 
multiplication :
+    # if A is in shape(3,2,4), B is in shape(4,2,5), it will return a matrix 
in shape(3,2,2,5)
+    #when axes is 2 and A,B are shape (3,2,4) and (2,4,5), it will return a 
matrix in shape(3,5)
+
+    if type(axes) == int:
+        axes_A = list(range(-axes, 0))
+        axes_B = list(range(0, axes))
+        axes_B = axes_B
+    else:
+        axes_A,axes_B =axes
+    # when axes is a pair of sequences of integers.For example, A is in 
shape(3,2,4),
+    #B is in shape(4,2,5), we set axes as ([1,2],[1,0]), it will return a 
matrix in shape(3,5)
+    if isinstance(axes_A,list):
+        na = len(axes_A)
+        axes_A = list(axes_A)
+    else:
+        axes_A = [axes_A]
+        na = 1
+    if isinstance(axes_B,list):
+        nb = len(axes_B)
+        axes_B = list(axes_B)
+    else:
+        axes_B = [axes_B]
+        nb = 1
+
+    # a_shape and b_shape are the shape of tensor A and B, while nda and ndb 
are the dim of A and B
+    a_shape = A.shape
+    nda = A.ndim()
+    b_shape = B.shape
+    ndb = B.ndim()
+    equal = True
+    # to check if the length of axe_A is equal to axes_B
+    if na != nb:
+        equal = False
+    else:
+    # to make the shape match
+        for k in range(na):
+            if a_shape[axes_A[k]] != b_shape[axes_B[k]]:
+                equal = False
+                break
+            if axes_A[k] < 0:
+                axes_A[k] += nda
+            if axes_B[k] < 0:
+                axes_B[k] += ndb
+    if not equal:
+        raise ValueError("shape-mismatch for sum")
+    '''start to do the calculation according to the axes'''
+
+    notin = [k for k in range(nda) if k not in axes_A]
+    # nda is the dim of A, and axes_a is the axis for A, notin is the axis 
which is not in axes_A
+    newaxes_a = notin + axes_A
+    N2 = 1
+    for axis in axes_A:
+        N2 *= a_shape[axis]
+    N1 = 1
+    for ax in notin:
+        N1 *=a_shape[ax]
+    # newshape_a is the shape to do multiplication.For example, A is in 
shape(3,2,4),
+    #B is in shape(4,2,5), we set axes as ([1,2],[1,0]), then newshape_a 
should be (3,5)
+    #olda is the shape that will be shown in the result.
+    newshape_a = (N1,N2)
+    olda = [a_shape[axis] for axis in notin]
+    notin = [k for k in range(ndb) if k not in axes_B]
+    newaxes_b = axes_B + notin
+    N2 = 1
+    for axis in axes_B:
+        N2 *= b_shape[axis]
+    N1 = 1
+    for bx in notin:
+        N1 *= b_shape[bx]
+    newshape_b = (N2, N1)
+    oldb = [b_shape[axis] for axis in notin]
+    # do transpose and reshape to get the 2D matrix to do multiplication
+    at = A.transpose(newaxes_a).reshape(newshape_a)
+    bt = B.transpose(newaxes_b).reshape(newshape_b)
+    res = mult(at, bt)
+    #reshape the result
+    return res.reshape(olda + oldb)
 
 def div(lhs, rhs, ret=None):
     '''Elementi-wise division.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/src/core/device/device.cc
----------------------------------------------------------------------
diff --git a/src/core/device/device.cc b/src/core/device/device.cc
index cda1b9f..d569015 100644
--- a/src/core/device/device.cc
+++ b/src/core/device/device.cc
@@ -64,6 +64,29 @@ void Device::CopyDataToFrom(Block* dst, Block* src, size_t 
nBytes,
       {src}, {dst});
 }
 
+void Device::RepeatDataToFrom(Block* dst, Block* src, size_t nBytes,
+                              CopyDirection direct, bool broadcast_flag, 
+                              int axis_shape, int shape_outer, int chunk, 
+                              vector<int> repeats, int dst_offset, int 
src_offset) {
+  const char *src_data = reinterpret_cast<const char*>(src->data()) + 
dst_offset;
+  char *dst_data = reinterpret_cast<char*>(dst->mutable_data()) + src_offset;
+
+  for (int i = 0; i < shape_outer; i++) {
+    for (int j = 0; j < axis_shape; j++) {
+      int temp = broadcast_flag ? repeats[0] : repeats[j];
+      for (int k = 0; k < temp; k++) {
+        this->Exec(
+            [this, dst_data, src_data, direct, chunk, repeats](Context* ctx) {
+              this->CopyToFrom(dst_data, src_data, chunk, direct, ctx);
+            },
+            {src}, {dst});
+        dst_data += chunk;
+      }
+      src_data += chunk;
+    }
+  }
+}
+
 void Device::CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes,
                                  size_t dst_offset) {
   auto direct = lang_ == kCpp ? kHostToHost : kHostToDevice;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index de0d7d2..22541df 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -22,6 +22,8 @@
 #include "./tensor_math_opencl.h"
 #include <utility>
 
+#define Noaxis 9999
+
 namespace singa {
 
 Tensor::~Tensor() {
@@ -214,6 +216,20 @@ void Tensor::CopyData(const Tensor &src) {
   }
 }
 
+void Tensor::RepeatData(vector<int> repeats, int axis, int total_repeats, 
const Tensor &src) {
+  if(axis == Noaxis) {
+    CHECK_EQ(Size(), src.Size()*total_repeats);
+  } else {
+    CHECK_EQ(Size(), src.Size()*total_repeats/src.shape()[axis]);
+  }
+
+  CHECK(block_ != nullptr);
+  // Do repeat only if the src's block is already initialized.
+  if (src.block_ != nullptr) {
+    singa::RepeatDataToFrom(false, repeats, axis, this, src, Size(), 0, 0);
+  }
+}
+
 void Tensor::FromProto(const singa::TensorProto &proto) {
   if (block_ != nullptr && block_->DecRefCount() == 0)
     device_->FreeBlock(block_);
@@ -329,6 +345,29 @@ Tensor Tensor::Clone(std::shared_ptr<Device> device) const 
{
   return t;
 }
 
+Tensor Tensor::Repeat(vector<int> repeats, int axis, std::shared_ptr<Device> 
device) {
+  if (device == nullptr) device = device_;
+  Tensor t;
+  int total_repeats = 0;
+  if (axis == Noaxis) {
+    total_repeats = repeats[0];
+    t.shape_.push_back(Product(shape_)*total_repeats);
+  } else {
+    for (size_t i = 0; i < shape_[axis]; i++) {
+      if(repeats[i] < 0) {
+        LOG(FATAL) << "the repeats number is less than zero";
+      }
+      total_repeats += repeats[i];
+      t.shape_.push_back(Product(shape_)/shape_[axis]*total_repeats);
+    }
+  }
+  t.device_ = device_;
+  t.data_type_ = data_type_;
+  t.strides_.push_back(1);
+  t.RepeatData(repeats, axis, total_repeats, *this);
+  return t;
+}
+
 //yisen todo
 Tensor Tensor::T() const {
   // this function only works for 2d tensors
@@ -482,6 +521,56 @@ void CopyDataToFrom(Tensor *dst, const Tensor &src, const 
size_t num,
     src_dev->CopyDataToFrom(to, from, nBytes, direct, (int)d_offset, 
(int)s_offset);
   }
 }
+
+void RepeatDataToFrom(bool broadcast_flag, vector<int> repeats, int axis, 
+                      Tensor *dst, const Tensor &src, const size_t num, 
+                      const size_t dst_offset, const size_t src_offset) {
+  if (repeats.size() == 1) {
+    broadcast_flag = true;
+  }
+  if (repeats.size() > 1) {
+    if (axis == Noaxis) {
+      LOG(FATAL) << "When repeats parameter is sequence, axis cannot be None";
+    }
+  }
+  for (size_t i = 0; i < repeats.size(); i++){
+    CHECK_GE(repeats[i], 0);
+  }
+  auto width = SizeOf(src.data_type());
+  CHECK_EQ(width, SizeOf(dst->data_type()));
+  size_t nBytes = num * width;
+  auto d_offset = dst_offset * width;
+  auto s_offset = src_offset * width;
+  int chunk = width;
+  int axis_shape = 1;
+  if (axis == Noaxis){
+    axis_shape = 1;
+  } else {
+    axis_shape = src.shape()[axis];
+    for(size_t i = axis + 1; i < src.nDim(); i++) {
+      chunk *= src.shape()[i];
+    }
+  }
+  int shape_outer = Product(src.shape());
+  std::shared_ptr<Device> src_dev = src.device(), dst_dev = dst->device();
+  Block *from = src.block(), *to = dst->block();
+  if (dst_dev->lang() != src_dev->lang()) {
+    // let the none cpp device conduct copy op
+    if (dst_dev->lang() == kCpp) {
+      src_dev->RepeatDataToFrom(to, from, nBytes, kDeviceToHost, 
broadcast_flag, axis_shape, 
+                                shape_outer, chunk, repeats, (int)d_offset, 
(int)s_offset);
+    } else if (src_dev->lang() == kCpp) {
+      dst_dev->RepeatDataToFrom(to, from, nBytes, kHostToDevice, 
broadcast_flag, axis_shape, 
+                                shape_outer, chunk, repeats, (int)d_offset, 
(int)s_offset);
+    } else {
+      LOG(FATAL) << "Not support mem repeat copy betwee Cuda and OpenCL 
device";
+    }
+  } else {
+    auto direct = src_dev->lang() == kCpp ? kHostToHost : kDeviceToDevice;
+    src_dev->RepeatDataToFrom(to, from, nBytes, direct, broadcast_flag, 
axis_shape, 
+                              shape_outer, chunk, repeats, (int)d_offset, 
(int)s_offset);
+  }
+}
 //============================================================================
 /// typedef DType accroding to type value.
 /// DType would be used in the code block __VA_ARGS__.

Reply via email to