[3/4] incubator-singa git commit: SINGA-348 Support autograd MLP Example

wangwei Thu, 12 Apr 2018 02:17:00 -0700

SINGA-348 Support autograd MLP Example

1. rename tensor.Tensor's attribute singa_tensor to data and add CTensor
for singa.Tensor


2. update Operation and autograd to simplify the code. The dependency is
recorded in a simple Counter: op -> #children ops

3. update the comments of some operations and autograd

4. made some changes in design

5. add operation.requires_grad, which is helpful in avoiding unnecessary 
calculations.

6. a question left: broadcast elementwise multiply seems not support, bring 
problems when sent CTensor gradient to loss.backward


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f42d4d07
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f42d4d07
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f42d4d07

Branch: refs/heads/master
Commit: f42d4d076df55527c34fd6816580e097d908710b
Parents: 755eba6
Author: Wang Wei <[email protected]>
Authored: Sat Apr 7 23:03:26 2018 +0800
Committer: Wang Wei <[email protected]>
Committed: Thu Apr 12 16:59:48 2018 +0800

----------------------------------------------------------------------
 examples/MLP.py           |  83 ++----
 python/singa/autograd.py  | 107 ++++++++
 python/singa/engine.py    | 141 ----------
 python/singa/layer.py     |  24 +-
 python/singa/loss.py      |   8 +-
 python/singa/metric.py    |  13 +-
 python/singa/optimizer.py |  36 ++-
 python/singa/snapshot.py  |   4 +-
 python/singa/tensor.py    | 603 ++++++++++++++++++++++++-----------------
 python/singa/utils.py     |  10 +-
 10 files changed, 550 insertions(+), 479 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/examples/MLP.py
----------------------------------------------------------------------
diff --git a/examples/MLP.py b/examples/MLP.py
index 405d998..b773efb 100644
--- a/examples/MLP.py
+++ b/examples/MLP.py
@@ -1,7 +1,6 @@
-
 from singa import tensor
-from singa import engine
-from singa import singa_wrap as singa
+from singa import autograd
+from singa import optimizer
 import numpy as np
 
 
@@ -46,65 +45,39 @@ if __name__ == '__main__':
         categorical = np.reshape(categorical, output_shape)
         return categorical
 
-    label = to_categorical(label,2).astype(np.float32)
-    print 'train_data_shape:', data.shape, 'train_label_shape:', label.shape
-
-    # send training data(numpy array) to singa_tensor
-    tr_data = singa.Tensor((400, 2))
-    tr_data.CopyFloatDataFromHostPtr(data.flatten())
-
-    tr_label = singa.Tensor((400, 2))
-    tr_label.CopyFloatDataFromHostPtr(label.flatten())
-
-    w_0 = singa.Tensor((2, 3))
-    singa.Gaussian(float(0), float(0.1), w_0)
-    b_0 = singa.Tensor((1, 3))
-    b_0.SetFloatValue(float(0))
-
-    w_1 = singa.Tensor((3, 2))
-    singa.Gaussian(float(0), float(0.1), w_1)
-    b_1 = singa.Tensor((1, 2))
-    b_1.SetFloatValue(float(0))
-
-    # initialize tensor.Tensor using singa_tensor
-    inputs = tensor.Tensor(data=tr_data, requires_grad=False, 
grad_outlet=False)
-    target = tensor.Tensor(data=tr_label, requires_grad=False, 
grad_outlet=False)
+    label = to_categorical(label, 2).astype(np.float32)
+    print('train_data_shape:', data.shape)
+    print('train_label_shape:', label.shape)
 
-    weight_0 = tensor.Tensor(data=w_0, requires_grad=True, grad_outlet=True)
-    bias_0 = tensor.Tensor(data=b_0, requires_grad=True, grad_outlet=True)
+    inputs = tensor.Tensor(data=data, requires_grad=False)
+    target = tensor.Tensor(data=label, requires_grad=False)
 
-    weight_1 = tensor.Tensor(data=w_1, requires_grad=True, grad_outlet=True)
-    bias_1 = tensor.Tensor(data=b_1, requires_grad=True, grad_outlet=True)
+    w0 = tensor.Tensor(shape=(2, 3), requires_grad=True, stores_grad=True)
+    w0.gaussian(0.0, 0.1)
+    b0 = tensor.Tensor(shape=(1, 3), requires_grad=True, stores_grad=True)
+    b0.set_value(0.0)
 
-    def update(lr, param, grad):
-        '''
-        To update the value of parameters
-        Args:
-            param: tensor.Tensor
-            grad: singa_tensor
-        '''
-        grad *= float(lr)
-        assert param.singa_tensor.shape() == grad.shape()
-        param.singa_tensor = singa.__sub__(param.singa_tensor, grad)
-        return
+    w1 = tensor.Tensor(shape=(3, 2), requires_grad=True, stores_grad=True)
+    w1.gaussian(0.0, 0.1)
+    b1 = tensor.Tensor(shape=(1, 2), requires_grad=True, stores_grad=True)
+    b1.set_value(0.0)
 
+    sgd = optimizer.SGD(0.05)
     # training process
-    lr = 0.05
     for i in range(1001):
-        outputs = tensor.dot(inputs, weight_0)
-        outputs = tensor.add_bias(bias_0, outputs)
-        outputs = tensor.relu(outputs)
-        outputs = tensor.dot(outputs, weight_1)
-        outputs = tensor.add_bias(bias_1, outputs)
-        outputs = tensor.softmax(outputs)
-
-        loss = tensor.cross_entropy(outputs, target)
-
-        grads = float(1)
-        in_grads = engine.gradients(loss, grads)
+        x = tensor.matmul(inputs, w0)
+        x = tensor.add_bias(x, b0)
+        x = tensor.relu(x)
+        x = tensor.matmul(x, w1)
+        x = tensor.add_bias(x, b1)
+        x = tensor.softmax(x)
+        loss = tensor.cross_entropy(x, target)
+        in_grads = autograd.backward(loss)
 
         for param in in_grads:
-            update(lr, param, in_grads[param])
+            sgd.apply(0, in_grads[param], param, '')
 
         if (i % 100 == 0):
-            print 'training loss = ', float(tensor.To_Numpy(loss.singa_tensor))
\ No newline at end of file
+            print('training loss = ', tensor.to_numpy(loss)[0])
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
new file mode 100644
index 0000000..399fa19
--- /dev/null
+++ b/python/singa/autograd.py
@@ -0,0 +1,107 @@
+from collections import Counter, deque
+from singa import tensor
+
+
+
+def infer_dependency(op):
+    '''
+    Infer the dependency of all operations with the
+    given op as the last operation.
+
+    Operation A is depending on B is A uses the output(s) of B.
+
+    Args:
+        op: an Operation instance, e.g. the loss operation.
+
+    Return:
+        a Counter instance with the operation as the key,
+        and the number of operations that are depending on it as the value
+    '''
+    # dependency = {}
+    dependency_count = Counter()
+    queue = deque([op])
+    while len(queue) > 0:
+        cur_op = queue.pop()
+        for src_op, _, _, _ in cur_op.src:
+            if src_op not in dependency_count:
+                # dependency[src_op] = [Counter() for _ in src_op.y_id2idx]
+                dependency_count[src_op] = 0
+                queue.append(src_op)
+            # y_idx = src_op.y_id2idx[x_id]
+            # dependency[src_op][y_idx][cur_op] += 1
+            dependency_count[src_op] += 1
+    return dependency_count
+
+
+def backward(y, dy=None):
+    '''
+    Run the backward propagation starting at y.
+
+    Args:
+        y: a Tensor instance, usually the loss
+        dy: a number or a Tensor instance, for the gradient of the
+            objective/loss w.r.t y, usually 1.0
+
+    Return:
+        a dictionary storing the gradient tensors of all tensors
+        whose stores_grad is true (e.g. parameter tensors)
+    '''
+    dependency = infer_dependency(y.creator)
+    assert y.size() == 1, 'y must be a Tensor with a single value;'\
+        'size of y is % d' % y.size()
+
+    # by default the dy is a tensor with 1.0 for each sample;
+    if dy is None:
+        dy = float(1.0)
+    elif isinstance(dy, tensor.Tensor):
+        dy = dy.data
+    else:
+        dy = float(dy)
+
+    # ready is a queue of (operation, dy list)
+    ready = deque([(y.creator, (dy,))])
+    not_ready = {}  # mapping: op->[dy]
+    gradients = {}  # mapping: x->dx if x.stores_grad
+
+    while len(ready) > 0:
+        op, dys = ready.pop()
+        #if not isinstance(op, tensor.Dummy):
+        dxs = op._do_backward(*dys)
+        # TODO src and dx must match
+        assert len(op.src) == len(dxs), \
+            'the number of src ops (=%d) and dx (=%d) not match' \
+            % (len(op.src), len(dxs))
+        for (src_op, x_id, param, x_stores_grad), dx in zip(op.src, dxs):
+            # x_id is the python id of one input arg of op, denoted as x.
+            # y_idx (below) is the index of x among the outputs of src_op.
+            # not_ready[src_op][y_idx] records the intermediate gradient
+            # of the y_idx'th output of src_op. 'intermediate gradient'
+            # indicates that if this output is used in multiple children
+            # operations, then we have to add the graident (dx) from all these
+            # children operations. When src_op is ready, it means that
+            # the gradient of all its outputs are available, i.e. all children
+            # operations have been backwarded.
+            y_idx = src_op.y_ids[x_id]
+            if src_op not in not_ready:
+                # src_op may have mulitple outputs
+                not_ready[src_op] = [None for _ in src_op.y_ids]
+                not_ready[src_op][y_idx] = dx
+            else:
+                dxs = not_ready[src_op]
+                if dxs[y_idx] is None:
+                    dxs[y_idx] = dx
+                else:
+                    # add the gradient from another children operation that
+                    # uses y_idx'th output of src_op as input arg
+                    dxs[y_idx] += dx
+            if x_stores_grad:
+                # store the gradient for final return, e.g. if x is parameter
+                gradient = not_ready[src_op][y_idx]
+                gradients[param] = tensor.Tensor(device=gradient.device, 
data=gradient, requires_grad=False)
+            dependency[src_op] -= 1
+            if src_op.requires_grad is True:
+                if dependency[src_op] == 0:
+                    ready.append((src_op, not_ready[src_op]))
+                    del not_ready[src_op]
+
+    return gradients

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/python/singa/engine.py
----------------------------------------------------------------------
diff --git a/python/singa/engine.py b/python/singa/engine.py
deleted file mode 100644
index a326ab4..0000000
--- a/python/singa/engine.py
+++ /dev/null
@@ -1,141 +0,0 @@
-from collections import Counter
-from singa import singa_wrap as singa
-from singa import tensor
-
-
-class GradientFlowController(object):
-    '''
-    Control backward gradients flow by running the method, run_backward()
-
-    '''
-    def __init__(self):
-        pass
-
-    def dependency_check(self, function):
-        '''
-        Compute how many times each 'previous_function'(Operation object) 
influent its next_functions
-
-        though which outputs
-
-        Arg:
-            function: a Operation object which is the termination
-
-        Return:
-            dependencies: a dictionary recording dependencies among 
functions(Operations)
-            seen: a set recording all functions(Operations) observed
-
-        '''
-        dependencies = {}
-        seen = {function}
-        queue = [function]
-        while len(queue) > 0:
-            f = queue.pop()
-            for previous_function, Arg_ID in f.previous_functions:
-                if previous_function not in dependencies:
-                    dependencies[previous_function] = [Counter() for _ in 
previous_function.output_ids]
-                output_idx = previous_function.output_ids[Arg_ID]
-                dependencies[previous_function][output_idx][f] += 1
-                if previous_function not in seen:
-                    queue.append(previous_function)
-                    seen.add(previous_function)
-        return dependencies, seen
-
-    def dependency_release(self, dependencies, previous_function, function, 
Arg_ID):
-        '''
-        To release dependency: if previous_function receive one gradient 
though its
-
-        output(can be found by Arg_ID) from function, the corresponding 
dependency counter
-
-        minus one.
-
-        '''
-        deps = dependencies[previous_function]
-        output_idx = previous_function.output_ids[Arg_ID]
-        output_deps = deps[output_idx]
-        output_deps[function] -= 1
-        if output_deps[function] == 0:
-            del output_deps[function]
-        return output_idx
-
-    def is_ready_for_backward(self, dependencies, function):
-        '''
-        Check if a function(Operation) is ready for backward.
-
-        Return: Trur or Flase
-
-        '''
-        for deps in dependencies[function]:
-            if len(deps) > 0:
-                return False
-        return True
-
-    def run_backward(self, Tensor, grad):
-        '''
-        Run the autograd process.
-
-        Args:
-            Tensor: the object tensor to optimize, usually the loss
-            grad: received gradients
-
-        Return:
-            gradients: a dictionary recording the gradients
-
-        '''
-        ready = [(Tensor.creator, (grad,))]
-        not_ready = {}
-
-        dependencies, seen = self.dependency_check(Tensor.creator)
-
-        while len(ready) > 0:
-            function, grad = ready.pop()
-            gradient_inputs = function._do_backward(*grad)
-            for (previous_function, Arg_ID), gradient_input in 
zip(function.previous_functions, gradient_inputs):
-                if not previous_function.requires_grad:
-                    continue
-                
-                output_index = self.dependency_release(dependencies, 
previous_function, function, Arg_ID)
-                is_ready = self.is_ready_for_backward(dependencies, 
previous_function)
-                
-                if is_ready:
-                    if previous_function in not_ready:
-                        previous_functions_gradients = 
not_ready[previous_function]
-                        if not previous_functions_gradients[output_index]:
-                            previous_functions_gradients[output_index] = 
gradient_input
-                        else:
-                            previous_functions_gradients[output_index] = \
-                                
singa.__add__(previous_functions_gradients[output_index], gradient_input)
-                        del not_ready[previous_function]
-                    else:
-                        assert output_index == 0
-                        previous_functions_gradients = (gradient_input,)
-                    ready.append((previous_function, 
previous_functions_gradients))
-                else:
-                    if previous_function in not_ready:
-                        previous_functions_gradients = 
not_ready[previous_function]
-                    else:
-                        previous_functions_gradients = [None for _ in 
previous_function.output_ids]
-
-                    if not previous_functions_gradients[output_index]:
-                        previous_functions_gradients[output_index] = 
gradient_input
-                    else:
-                        previous_functions_gradients[output_index] = \
-                            
singa.__add__(previous_functions_gradients[output_index], gradient_input)
-
-                    not_ready[previous_function] = previous_functions_gradients
-
-        gradients = {}
-        for f in seen:
-            if isinstance(f, tensor.Initializer):
-                if f.Tensor.grad_outlet is True:
-                    gradients[f.Tensor] = f.grads
-                    f.grads = f.init.Clone()
-        return gradients
-
-
-def gradients(Tensor, out_gradient):
-    '''
-    Compute gradients of Tensor.
-
-    '''
-    Controller = GradientFlowController()
-    return Controller.run_backward(Tensor, out_gradient)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/python/singa/layer.py b/python/singa/layer.py
index 348bedc..516e778 100644
--- a/python/singa/layer.py
+++ b/python/singa/layer.py
@@ -92,6 +92,7 @@ class Layer(object):
     Args:
         name (str): layer name
     '''
+
     def __init__(self, name, conf=None, **kwargs):
         if conf is None:
             self.layer = None  # layer converted by swig
@@ -212,13 +213,13 @@ class Layer(object):
             else:
                 flag = model_pb2.kEval
         if type(x) is list:
-            xs = [t.singa_tensor for t in x]
+            xs = [t.data for t in x]
             y = self.layer.ForwardWithMultInputs(flag, xs)
         else:
             assert isinstance(x, tensor.Tensor), \
-                    'input of %s (type:%s) must be a Tensor or Tensor list'\
-                    % (self.name, type(x).__name__)
-            y = self.layer.Forward(flag, x.singa_tensor)
+                'input of %s (type:%s) must be a Tensor or Tensor list'\
+                % (self.name, type(x).__name__)
+            y = self.layer.Forward(flag, x.data)
         if type(y) is tuple:
             return tensor.from_raw_tensors(y)
         else:
@@ -242,13 +243,13 @@ class Layer(object):
                 flag = model_pb2.kEval
 
         if type(dy) == list:
-            dys = [t.singa_tensor for t in dy]
+            dys = [t.data for t in dy]
             ret = self.layer.BackwardWithMultInputs(flag, dys)
         else:
             assert isinstance(dy, tensor.Tensor), \
-                    'input of %s (type:%s) must be a Tensor or Tensor list'\
-                    % (self.name, type(dy).__name__)
-            dys = dy.singa_tensor
+                'input of %s (type:%s) must be a Tensor or Tensor list'\
+                % (self.name, type(dy).__name__)
+            dys = dy.data
             ret = self.layer.Backward(flag, dys)
         if type(ret[0]) is tuple:
             dxs = tensor.from_raw_tensors(ret[0])
@@ -279,6 +280,7 @@ class Dummy(Layer):
     '''A dummy layer that does nothing but just forwards/backwards the data
     (the input/output is a single tensor).
     '''
+
     def __init__(self, name, input_sample_shape=None):
         super(Dummy, self).__init__(name)
         self.output_sample_shape = input_sample_shape
@@ -586,6 +588,7 @@ class BatchNormalization(Layer):
 
 class L2Norm(Layer):
     '''Normalize each sample to have L2 norm = 1'''
+
     def __init__(self, name, input_sample_shape, epsilon=1e-8):
         super(L2Norm, self).__init__(name)
         self.y = None
@@ -863,6 +866,7 @@ class Split(Layer):
         input_sample_shape: includes a single integer for the input sample
             feature size.
     '''
+
     def __init__(self, name, num_output, input_sample_shape=None):
         self.num_output = num_output
         self.in_shape = input_sample_shape
@@ -1099,7 +1103,7 @@ class RNN(Layer):
         for t in inputs:
             assert isinstance(t, tensor.Tensor), \
                 'input must be py Tensor %s' % (type(t))
-            tensors.append(t.singa_tensor)
+            tensors.append(t.data)
         if type(flag) is bool:
             if flag:
                 flag = model_pb2.kTrain
@@ -1138,7 +1142,7 @@ class RNN(Layer):
         tensors = []
         for t in grad:
             assert isinstance(t, tensor.Tensor), 'grad must be py Tensor'
-            tensors.append(t.singa_tensor)
+            tensors.append(t.data)
         ret = self.layer.BackwardWithMultInputs(flag, tensors)
         return tensor.from_raw_tensors(ret[0]), tensor.from_raw_tensors(ret[1])
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/python/singa/loss.py
----------------------------------------------------------------------
diff --git a/python/singa/loss.py b/python/singa/loss.py
index 2c86146..fd7f157 100644
--- a/python/singa/loss.py
+++ b/python/singa/loss.py
@@ -72,7 +72,7 @@ class Loss(object):
             else:
                 flag = model_pb2.kEval
         return tensor.from_raw_tensor(
-            self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor))
+            self.swig_loss.Forward(flag, x.data, y.data))
 
     def backward(self):
         '''
@@ -97,7 +97,7 @@ class Loss(object):
             else:
                 flag = model_pb2.kEval
 
-        return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor)
+        return self.swig_loss.Evaluate(flag, x.data, y.data)
 
 
 class SoftmaxCrossEntropy(Loss):
@@ -122,6 +122,7 @@ class SigmoidCrossEntropy(Loss):
     '''This loss evaluates the cross-entropy loss between the prediction and 
the
     truth values with the prediction probability generated from Sigmoid.
     '''
+
     def __init__(self, epsilon=1e-8):
         super(SigmoidCrossEntropy, self).__init__()
         self.truth = None
@@ -146,7 +147,7 @@ class SigmoidCrossEntropy(Loss):
         np = 1 - p
         p += (p < self.epsilon) * self.epsilon
         np += (np < self.epsilon) * self.epsilon
-        l = (y-1) * tensor.log(np) - y * tensor.log(p)
+        l = (y - 1) * tensor.log(np) - y * tensor.log(p)
         # TODO(wangwei): add unary operation -Tensor
         return tensor.average(l, axis=1)
 
@@ -177,6 +178,7 @@ class SquaredError(Loss):
 
     It is implemented using Python Tensor operations.
     '''
+
     def __init__(self):
         super(SquaredError, self).__init__()
         self.err = None

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/python/singa/metric.py
----------------------------------------------------------------------
diff --git a/python/singa/metric.py b/python/singa/metric.py
index 893b139..92f115c 100644
--- a/python/singa/metric.py
+++ b/python/singa/metric.py
@@ -68,7 +68,7 @@ class Metric(object):
             a tensor of floats, one per sample
         '''
         return tensor.from_raw_tensor(
-            self.swig_metric.Forward(x.singa_tensor, y.singa_tensor))
+            self.swig_metric.Forward(x.data, y.data))
 
     def evaluate(self, x, y):
         '''Compute the averaged metric over all samples.
@@ -79,7 +79,7 @@ class Metric(object):
         Returns:
             a float value for the averaged metric
         '''
-        return self.swig_metric.Evaluate(x.singa_tensor, y.singa_tensor)
+        return self.swig_metric.Evaluate(x.data, y.data)
 
 
 class Accuracy(Metric):
@@ -87,6 +87,7 @@ class Accuracy(Metric):
 
     It calls the C++ functions to do the calculation.
     '''
+
     def __init__(self):
         self.swig_metric = singa.Accuracy()
 
@@ -96,6 +97,7 @@ class Precision(Metric):
 
     Compute the precision against the groundtruth labels
     '''
+
     def __init__(self, top_k):
         self.top_k = top_k
 
@@ -119,7 +121,8 @@ class Precision(Metric):
         x_np = tensor.to_numpy(x)
         y_np = tensor.to_numpy(y)
 
-        pred_np = np.argsort(-x_np)[:, 0:self.top_k]  # Sort in descending 
order
+        # Sort in descending order
+        pred_np = np.argsort(-x_np)[:, 0:self.top_k]
 
         prcs_np = np.zeros(pred_np.shape[0], dtype=np.float32)
 
@@ -157,6 +160,7 @@ class Recall(Metric):
 
     Compute the recall against the groundtruth labels
     '''
+
     def __init__(self, top_k):
         self.top_k = top_k
 
@@ -180,7 +184,8 @@ class Recall(Metric):
         x_np = tensor.to_numpy(x)
         y_np = tensor.to_numpy(y)
 
-        pred_np = np.argsort(-x_np)[:, 0:self.top_k]  # Sort in descending 
order
+        # Sort in descending order
+        pred_np = np.argsort(-x_np)[:, 0:self.top_k]
 
         recall_np = np.zeros(pred_np.shape[0], dtype=np.float32)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/python/singa/optimizer.py b/python/singa/optimizer.py
index a86c537..975641a 100644
--- a/python/singa/optimizer.py
+++ b/python/singa/optimizer.py
@@ -67,6 +67,7 @@ class Optimizer(object):
             constraint would be applied inside apply_with_lr(). Users can
             also apply constraint outside.
     '''
+
     def __init__(self, lr=None, momentum=None, weight_decay=None,
                  regularizer=None, constraint=None):
         self.lr = lr
@@ -211,11 +212,12 @@ class SGD(Optimizer):
     def apply_with_lr(self, epoch, lr, grad, value, name, step=-1):
         if grad.is_empty():
             return value
-        grad = self.apply_regularizer_constraint(epoch, value, grad, name, 
step)
+        grad = self.apply_regularizer_constraint(
+            epoch, value, grad, name, step)
         if name is not None and name in self.learning_rate_multiplier:
             lr = lr * self.learning_rate_multiplier[name]
-        self.opt.Apply(epoch, lr, name.encode(), grad.singa_tensor,
-                       value.singa_tensor)
+        self.opt.Apply(epoch, lr, name.encode(), grad.data,
+                       value.data)
         return value
 
 
@@ -240,11 +242,12 @@ class Nesterov(Optimizer):
         if grad.is_empty():
             return value
 
-        grad = self.apply_regularizer_constraint(epoch, value, grad, name, 
step)
+        grad = self.apply_regularizer_constraint(
+            epoch, value, grad, name, step)
         if name is not None and name in self.learning_rate_multiplier:
             lr = lr * self.learning_rate_multiplier[name]
-        self.opt.Apply(epoch, lr, name.encode(), grad.singa_tensor,
-                       value.singa_tensor)
+        self.opt.Apply(epoch, lr, name.encode(), grad.data,
+                       value.data)
         return value
 
 
@@ -272,11 +275,12 @@ class RMSProp(Optimizer):
         if grad.is_empty():
             return value
 
-        grad = self.apply_regularizer_constraint(epoch, value, grad, name, 
step)
+        grad = self.apply_regularizer_constraint(
+            epoch, value, grad, name, step)
         if name is not None and name in self.learning_rate_multiplier:
             lr = lr * self.learning_rate_multiplier[name]
-        self.opt.Apply(step, lr,  name.encode(), grad.singa_tensor,
-                       value.singa_tensor)
+        self.opt.Apply(step, lr,  name.encode(), grad.data,
+                       value.data)
         return value
 
 
@@ -303,11 +307,12 @@ class AdaGrad(Optimizer):
         if grad.is_empty():
             return value
 
-        grad = self.apply_regularizer_constraint(epoch, value, grad, name, 
step)
+        grad = self.apply_regularizer_constraint(
+            epoch, value, grad, name, step)
         if name is not None and name in self.learning_rate_multiplier:
             lr = lr * self.learning_rate_multiplier[name]
-        self.opt.Apply(epoch, lr,  name.encode(), grad.singa_tensor,
-                       value.singa_tensor)
+        self.opt.Apply(epoch, lr,  name.encode(), grad.data,
+                       value.data)
         return value
 
 
@@ -349,7 +354,8 @@ class Adam(Optimizer):
             self.t += 1
             self.last_step = step
             self.last_epoch = epoch
-        grad = self.apply_regularizer_constraint(epoch, value, grad, name, 
step)
+        grad = self.apply_regularizer_constraint(
+            epoch, value, grad, name, step)
         if name is not None and name in self.learning_rate_multiplier:
             lr = lr * self.learning_rate_multiplier[name]
         if name not in self.m or name not in self.v:
@@ -389,7 +395,7 @@ class CppRegularizer(Regularizer):
         self.reg.Setup(conf.SerializeToString())
 
     def apply(self, epoch, value, grad, step=-1):
-        self.reg.Apply(epoch, value.singa_tensor, grad.singa_tensor)
+        self.reg.Apply(epoch, value.data, grad.data)
         return grad
 
 
@@ -429,7 +435,7 @@ class CppConstraint(Constraint):
         self.constraint.Setup(conf.SerializeToString())
 
     def apply(self, epoch, value, grad, step=-1):
-        self.constraint.Apply(epoch, value.singa_tensor, grad.singa_tensor,
+        self.constraint.Apply(epoch, value.data, grad.data,
                               step)
         return grad
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/python/singa/snapshot.py
----------------------------------------------------------------------
diff --git a/python/singa/snapshot.py b/python/singa/snapshot.py
index a4ac988..7c97f0f 100644
--- a/python/singa/snapshot.py
+++ b/python/singa/snapshot.py
@@ -36,10 +36,12 @@ from builtins import object
 from . import singa_wrap as singa
 from . import tensor
 
+
 class Snapshot(object):
     ''' Class and member functions for singa::Snapshot.
 
     '''
+
     def __init__(self, f, mode, buffer_size=10):
         '''Snapshot constructor given file name and R/W mode.
 
@@ -57,7 +59,7 @@ class Snapshot(object):
             param_name (string): name of the parameter
             param_val (Tensor): value tensor of the parameter
         '''
-        self.snapshot.Write(param_name.encode(), param_val.singa_tensor)
+        self.snapshot.Write(param_name.encode(), param_val.data)
 
     def read(self):
         '''Call read method to load all (param_name, param_val)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index df29cf5..f4801a4 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -63,60 +63,60 @@ from functools import reduce
 
 from .proto import core_pb2
 from . import singa_wrap as singa
-from . import device as pydevice
+from .device import get_default_device
 
 int32 = core_pb2.kInt
 float32 = core_pb2.kFloat32
-
+CTensor = singa.Tensor
 
 
 class Tensor(object):
-    '''Create a Py Tensor, which wraps a swig converted Tensor from CPP Tensor
-
-    The three arguments are three attributes of the Tensor.
+    '''Python Tensor, which wraps a swig converted Tensor from CPP Tensor.
 
     Args:
-        shape (list<int>): a list of integers for the tensor shape. If shape is
-            not specified, the created tensor is called a dummy tensor.
-        device: a swig converted Device instance using the device moduel . If 
it
-            is None, then the default host device would be used.
-        dtype: data type. currently, most operations only accept kFloat32.
-        data: a singa_tensor recording input data.
-        creator: a Operation object which generate this tensor.
-        requires_grad: a bool recording if the creator of tensor require 
gradient.
-        grad_outlet: a bool recording if the tensor is a outlet for gradient.
-
-    '''
-    def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32, 
data=None, creator=None, requires_grad=True,
-                 grad_outlet=False):
-        if shape is None:
-            # call constructor of singa::Tensor
-            self.singa_tensor = singa.Tensor()
+        shape (tuple<int>): a tuple of integers for the tensor shape. If shape
+            is not specified, the created tensor is called a dummy tensor.
+        device: a swig device. If None, the default host device is used.
+        dtype: data type. currently, most operations only accept float32.
+        data: a numpy array or swig tensor.
+        requires_grad: boolean indicator for computing the gradient.
+        stores_grad: boolean indicator for storing and returning the gradient.
+                     Some intermediate tensors' gradient can be released
+                     during the backward propagation. A tensor may require
+                     grad but not store grad; But if a tensor stores grad
+                     then it must require grad.
+    '''
+
+    def __init__(self, shape=(), device=None, dtype=float32,
+                 data=None, requires_grad=True, stores_grad=False,
+                 creator=None):
+        if device is None:
+            device = get_default_device()
+        if isinstance(data, np.ndarray):
+            self.data = CTensor(list(data.shape), device, dtype)
+            copy_from_numpy(self.data, data)
+        elif isinstance(data, CTensor):
+            self.data = data
+            assert data.device == device, 'not the same device'
         else:
-            assert isinstance(shape, tuple), 'shape should be tuple'
-            if device is None:
-                device = pydevice.get_default_device()
-                self.singa_tensor = singa.Tensor(list(shape), device, dtype)
-            else:
-                self.singa_tensor = singa.Tensor(list(shape), device, dtype)
-        if data is not None:
-            self.singa_tensor = data
-            if creator is None:
-                creator = Initializer(self, requires_grad)
+            self.data = CTensor(list(shape), device, dtype)
 
-        self.shape = tuple(self.singa_tensor.shape())
-        self.device = self.singa_tensor.device()
-        self.dtype = self.singa_tensor.data_type()
-
-        self.creator = creator
-        self.grad_outlet = grad_outlet
+        self.shape = tuple(self.data.shape())
+        self.device = device
+        self.dtype = self.data.data_type()
+        self.requires_grad = requires_grad
+        self.stores_grad = stores_grad
+        if creator is None:
+            self.creator = Dummy(self)
+        else:
+            self.creator = creator
 
     def ndim(self):
         '''
         Returns:
             the number of dimensions of the tensor.
         '''
-        return self.singa_tensor.nDim()
+        return self.data.nDim()
 
     def is_empty(self):
         '''
@@ -130,21 +130,21 @@ class Tensor(object):
         Returns:
             True if the internal data is transposed; otherwise False.
         '''
-        return self.singa_tensor.transpose()
+        return self.data.transpose()
 
     def size(self):  # TODO(wangwei) compute size
         '''
         Returns:
             the number of elements of the tensor.
         '''
-        return self.singa_tensor.Size()
+        return self.data.Size()
 
     def memsize(self):
         '''
         Returns:
             the number of Bytes allocated for this tensor.
         '''
-        return self.singa_tensor.MemSize()
+        return self.data.MemSize()
 
     def reshape(self, shape):
         '''Change the tensor shape.
@@ -156,7 +156,7 @@ class Tensor(object):
         assert product(self.shape) == product(shape), \
             'product of shape should be equal'
         self.shape = shape
-        self.singa_tensor.Reshape(list(shape))
+        self.data.Reshape(list(shape))
 
     def reset_like(self, t):
         '''Reset the shape, dtype and device as the given tensor.
@@ -164,7 +164,7 @@ class Tensor(object):
         Args:
             t (Tensor)
         '''
-        self.singa_tensor.ResetLike(t.singa_tensor)
+        self.data.ResetLike(t.data)
         self.shape = t.shape
         self.device = t.device
         self.dtype = t.dtype
@@ -175,7 +175,7 @@ class Tensor(object):
 
         Args:
             dtype:
-        self.singa_tensor.AsType(dtype)
+        self.data.AsType(dtype)
     '''
 
     def to_device(self, device):
@@ -184,28 +184,28 @@ class Tensor(object):
         Args:
             device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU
         '''
-        self.singa_tensor.ToDevice(device)
+        self.data.ToDevice(device)
         self.device = device
 
     def to_host(self):
         '''Move the tensor data onto the default host CppCPU device.
         '''
-        self.singa_tensor.ToHost()
-        self.device = pydevice.default_device
+        self.data.ToHost()
+        self.device = get_default_device()
 
     def l2(self):
         '''
         Returns:
             the L2 norm.
         '''
-        return self.singa_tensor.L2()
+        return self.data.L2()
 
     def l1(self):
         '''
         Returns:
             the L1 norm.
         '''
-        return self.singa_tensor.L1()
+        return self.data.L1()
 
     def set_value(self, x):
         '''Set all elements of the tensor to be the give value.
@@ -215,7 +215,7 @@ class Tensor(object):
         '''
         # assert type(x) == float, 'set value only accepts float input'
         # if isinstance(x, float):
-        self.singa_tensor.SetFloatValue(float(x))
+        self.data.SetFloatValue(float(x))
 
     def copy_from_numpy(self, np_array, offset=0):
         ''' Copy the data from the numpy array.
@@ -229,9 +229,9 @@ class Tensor(object):
             np_array = np_array.flatten()
         dt = np_array.dtype
         if dt == np.float32:
-            self.singa_tensor.CopyFloatDataFromHostPtr(np_array)
+            self.data.CopyFloatDataFromHostPtr(np_array)
         elif dt == np.int or dt == np.int32:
-            self.singa_tensor.CopyIntDataFromHostPtr(np_array)
+            self.data.CopyIntDataFromHostPtr(np_array)
         else:
             print('Not implemented yet for ', dt)
 
@@ -242,14 +242,14 @@ class Tensor(object):
             t (Tensor): source Tensor.
         '''
         assert isinstance(t, Tensor), 't must be a singa Tensor instance'
-        self.singa_tensor.CopyData(t.singa_tensor)
+        self.data.CopyData(t.data)
 
     def clone(self):
         '''
         Returns:
             a new Tensor which does deep copy of this tensor
         '''
-        return _call_singa_func(self.singa_tensor.Clone)
+        return _call_singa_func(self.data.Clone)
 
     def T(self):
         ''' shallow copy, negate the transpose field.
@@ -258,12 +258,12 @@ class Tensor(object):
             a new Tensor which shares the underlying data memory (shallow copy)
             but is marked as a transposed version of this tensor.
         '''
-        return _call_singa_func(self.singa_tensor.T)
+        return _call_singa_func(self.data.T)
 
     def copy(self):
         '''shallow copy calls copy constructor of singa::Tensor
         '''
-        return _call_singa_func(singa.Tensor, self.singa_tensor)
+        return _call_singa_func(CTensor, self.data)
 
     def deepcopy(self):
         '''Same as clone().
@@ -279,7 +279,7 @@ class Tensor(object):
         Args:
             p (float): with probability p, each element is sample to 1.
         '''
-        singa.Bernoulli(float(p), self.singa_tensor)
+        singa.Bernoulli(float(p), self.data)
 
     def gaussian(self, mean, std):
         '''Generate a value for each element following a Gaussian distribution.
@@ -288,7 +288,7 @@ class Tensor(object):
             mean (float): mean of the distribution
             std (float): standard variance of the distribution
         '''
-        singa.Gaussian(float(mean), float(std), self.singa_tensor)
+        singa.Gaussian(float(mean), float(std), self.data)
 
     def uniform(self, low, high):
         '''Generate a value for each element following a uniform distribution.
@@ -297,7 +297,7 @@ class Tensor(object):
             low (float): the lower bound
             high (float): the hight bound
         '''
-        singa.Uniform(float(low), float(high), self.singa_tensor)
+        singa.Uniform(float(low), float(high), self.data)
 
     def add_column(self, v):
         '''Add a tensor to each column of this tensor.
@@ -305,7 +305,7 @@ class Tensor(object):
         Args:
             v (Tensor): a Tensor to be added as a column to this tensor.
         '''
-        singa.AddColumn(v.singa_tensor, self.singa_tensor)
+        singa.AddColumn(v.data, self.data)
 
     def add_row(self, v):
         '''Add a tensor to each row of this tensor.
@@ -313,7 +313,7 @@ class Tensor(object):
         Args:
             v (Tensor): a Tensor to be added as a row to this tensor.
         '''
-        singa.AddRow(v.singa_tensor, self.singa_tensor)
+        singa.AddRow(v.data, self.data)
 
     def div_column(self, v):
         '''Divide each column of this tensor by v.
@@ -321,7 +321,7 @@ class Tensor(object):
         Args:
             v (Tensor): 1d tensor of the same length the column of self.
         '''
-        singa.DivColumn(v.singa_tensor, self.singa_tensor)
+        singa.DivColumn(v.data, self.data)
 
     def div_row(self, v):
         '''Divide each row of this tensor by v.
@@ -329,7 +329,7 @@ class Tensor(object):
         Args:
             v (Tensor): 1d tensor of the same length the row of self.
         '''
-        singa.DivRow(v.singa_tensor, self.singa_tensor)
+        singa.DivRow(v.data, self.data)
 
     def mult_column(self, v):
         '''Multiply each column of this tensor by v element-wisely.
@@ -337,7 +337,7 @@ class Tensor(object):
         Args:
             v (Tensor): 1d tensor of the same length the column of self.
         '''
-        singa.MultColumn(v.singa_tensor, self.singa_tensor)
+        singa.MultColumn(v.data, self.data)
 
     def mult_row(self, v):
         '''Multiply each row of this tensor by v element-wisely.
@@ -345,7 +345,7 @@ class Tensor(object):
         Args:
             v (Tensor): 1d tensor of the same length the row of self.
         '''
-        singa.MultRow(v.singa_tensor, self.singa_tensor)
+        singa.MultRow(v.data, self.data)
 
     '''
     python operators (+=, -=, *=, /=) for singa::Tensor unary operators
@@ -358,9 +358,9 @@ class Tensor(object):
             x (float or Tensor):
         '''
         if isinstance(x, Tensor):
-            self.singa_tensor += x.singa_tensor
+            self.data += x.data
         else:
-            self.singa_tensor += float(x)
+            self.data += float(x)
         return self
 
     def __isub__(self, x):
@@ -371,9 +371,9 @@ class Tensor(object):
         '''
 
         if isinstance(x, Tensor):
-            self.singa_tensor -= x.singa_tensor
+            self.data -= x.data
         else:
-            self.singa_tensor -= float(x)
+            self.data -= float(x)
         return self
 
     def __imul__(self, x):
@@ -383,9 +383,9 @@ class Tensor(object):
             x (float or Tensor):
         '''
         if isinstance(x, Tensor):
-            self.singa_tensor *= x.singa_tensor
+            self.data *= x.data
         else:
-            self.singa_tensor *= float(x)
+            self.data *= float(x)
         return self
 
     def __idiv__(self, x):
@@ -395,9 +395,9 @@ class Tensor(object):
             x (float or Tensor):
         '''
         if isinstance(x, Tensor):
-            self.singa_tensor /= x.singa_tensor
+            self.data /= x.data
         else:
-            self.singa_tensor /= float(x)
+            self.data /= float(x)
         return self
 
     '''
@@ -408,70 +408,70 @@ class Tensor(object):
     def __add__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
-                singa.__add__(self.singa_tensor, rhs.singa_tensor))
+                singa.__add__(self.data, rhs.data))
         else:
             return _call_singa_func(singa.AddFloat,
-                                    self.singa_tensor, rhs)
+                                    self.data, rhs)
 
     def __sub__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
-                singa.__sub__(self.singa_tensor, rhs.singa_tensor))
+                singa.__sub__(self.data, rhs.data))
         else:
             return _call_singa_func(singa.SubFloat,
-                                    self.singa_tensor, rhs)
+                                    self.data, rhs)
 
     def __mul__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
-                singa.__mul__(self.singa_tensor, rhs.singa_tensor))
+                singa.__mul__(self.data, rhs.data))
         else:
             return _call_singa_func(singa.MultFloat,
-                                    self.singa_tensor, rhs)
+                                    self.data, rhs)
 
     def __div__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
-                singa.__div__(self.singa_tensor, rhs.singa_tensor))
+                singa.__div__(self.data, rhs.data))
         else:
             return _call_singa_func(singa.DivFloat,
-                                    self.singa_tensor, rhs)
+                                    self.data, rhs)
 
     def __truediv__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
-                singa.__div__(self.singa_tensor, rhs.singa_tensor))
+                singa.__div__(self.data, rhs.data))
         else:
             return _call_singa_func(singa.DivFloat,
-                                    self.singa_tensor, rhs)
+                                    self.data, rhs)
 
     def __lt__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
-                singa.__lt__(self.singa_tensor, rhs.singa_tensor))
+                singa.__lt__(self.data, rhs.data))
         else:
-            return _call_singa_func(singa.LTFloat, self.singa_tensor, rhs)
+            return _call_singa_func(singa.LTFloat, self.data, rhs)
 
     def __le__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
-                singa.__le__(self.singa_tensor, rhs.singa_tensor))
+                singa.__le__(self.data, rhs.data))
         else:
-            return _call_singa_func(singa.LEFloat, self.singa_tensor, rhs)
+            return _call_singa_func(singa.LEFloat, self.data, rhs)
 
     def __gt__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
-                singa.__gt__(self.singa_tensor, rhs.singa_tensor))
+                singa.__gt__(self.data, rhs.data))
         else:
-            return _call_singa_func(singa.GTFloat, self.singa_tensor, rhs)
+            return _call_singa_func(singa.GTFloat, self.data, rhs)
 
     def __ge__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
-                singa.__ge__(self.singa_tensor, rhs.singa_tensor))
+                singa.__ge__(self.data, rhs.data))
         else:
-            return _call_singa_func(singa.GEFloat, self.singa_tensor, rhs)
+            return _call_singa_func(singa.GEFloat, self.data, rhs)
 
     def __radd__(self, lhs):
         lhs = float(lhs)
@@ -514,7 +514,7 @@ class Tensor(object):
 
 def from_raw_tensor(t):
     x = Tensor(t.shape(), t.device(), t.data_type())
-    x.singa_tensor = t
+    x.data = t
     return x
 
 
@@ -548,7 +548,7 @@ def reshape(t, s):
     Returns:
         the new Tensor
     '''
-    return _call_singa_func(singa.Reshape, t.singa_tensor, s)
+    return _call_singa_func(singa.Reshape, t.data, s)
 
 
 def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
@@ -562,7 +562,7 @@ def copy_data_to_from(dst, src, size, dst_offset=0, 
src_offset=0):
         dst_offset (int): offset in terms of elements to the start of dst
         src_offset (int): offset in terms of elements to the start of src
     '''
-    singa.CopyDataToFrom(dst.singa_tensor, src.singa_tensor, size,
+    singa.CopyDataToFrom(dst.data, src.data, size,
                          dst_offset, src_offset)
 
 
@@ -614,9 +614,9 @@ def to_numpy(t):
     '''
     th = to_host(t)
     if th.dtype == core_pb2.kFloat32:
-        np_array = th.singa_tensor.GetFloatValue(int(th.size()))
+        np_array = th.data.GetFloatValue(int(th.size()))
     elif th.dtype == core_pb2.kInt:
-        np_array = th.singa_tensor.GetIntValue(int(th.size()))
+        np_array = th.data.GetIntValue(int(th.size()))
     else:
         print('Not implemented yet for ', th.dtype)
     return np_array.reshape(th.shape)
@@ -630,7 +630,7 @@ def abs(t):
     Returns:
         a new Tensor whose element y = abs(x), x is an element of t
     '''
-    return _call_singa_func(singa.Abs, t.singa_tensor)
+    return _call_singa_func(singa.Abs, t.data)
 
 
 def exp(t):
@@ -641,7 +641,7 @@ def exp(t):
     Returns:
         a new Tensor whose element y = exp(x), x is an element of t
     '''
-    return _call_singa_func(singa.Exp, t.singa_tensor)
+    return _call_singa_func(singa.Exp, t.data)
 
 
 def log(t):
@@ -652,19 +652,7 @@ def log(t):
     Returns:
         a new Tensor whose element y = log(x), x is an element of t
     '''
-    return _call_singa_func(singa.Log, t.singa_tensor)
-
-
-def relu(t):
-    '''
-    Args:
-        t (Tensor): input Tensor
-
-    Returns:
-        a new Tensor whose element y = x if x >0; otherwise 0; x is an element
-        of t
-    '''
-    return _call_singa_func(singa.ReLU, t.singa_tensor)
+    return _call_singa_func(singa.Log, t.data)
 
 
 def sigmoid(t):
@@ -675,7 +663,7 @@ def sigmoid(t):
     Returns:
         a new Tensor whose element y = sigmoid(x); x is an element of t
     '''
-    return _call_singa_func(singa.Sigmoid, t.singa_tensor)
+    return _call_singa_func(singa.Sigmoid, t.data)
 
 
 def sign(t):
@@ -686,7 +674,7 @@ def sign(t):
     Returns:
         a new Tensor whose element y = sign(x)
     '''
-    return _call_singa_func(singa.Sign, t.singa_tensor)
+    return _call_singa_func(singa.Sign, t.data)
 
 
 def sqrt(t):
@@ -697,7 +685,7 @@ def sqrt(t):
     Returns:
         a new Tensor whose element y = sqrt(x), x is an element of t
     '''
-    return _call_singa_func(singa.Sqrt, t.singa_tensor)
+    return _call_singa_func(singa.Sqrt, t.data)
 
 
 def square(t):
@@ -708,7 +696,7 @@ def square(t):
     Returns:
         a new Tensor whose element y = x * x, x is an element of t
     '''
-    return _call_singa_func(singa.Square, t.singa_tensor)
+    return _call_singa_func(singa.Square, t.data)
 
 
 def tanh(t):
@@ -719,7 +707,7 @@ def tanh(t):
     Returns:
         a new Tensor whose element y = tanh(x), x is an element of t
     '''
-    return _call_singa_func(singa.Tanh, t.singa_tensor)
+    return _call_singa_func(singa.Tanh, t.data)
 
 
 def sum(t, axis=None):
@@ -736,9 +724,9 @@ def sum(t, axis=None):
     '''
 
     if axis is None:
-        return singa.SumAsFloat(t.singa_tensor)
+        return singa.SumAsFloat(t.data)
     else:
-        return _call_singa_func(singa.Sum, t.singa_tensor, axis)
+        return _call_singa_func(singa.Sum, t.data, axis)
 
 
 def pow(t, x, out=None):
@@ -755,14 +743,14 @@ def pow(t, x, out=None):
     '''
     if out is None:
         if isinstance(x, Tensor):
-            return _call_singa_func(singa.Pow, t.singa_tensor, x.singa_tensor)
+            return _call_singa_func(singa.Pow, t.data, x.data)
         else:
-            return _call_singa_func(singa.PowFloat, t.singa_tensor, x)
+            return _call_singa_func(singa.PowFloat, t.data, x)
     else:
         if isinstance(x, Tensor):
-            singa.PowWithRet(t.singa_tensor, x.singa_tensor, out.singa_tensor)
+            singa.PowWithRet(t.data, x.data, out.data)
         else:
-            singa.PowFloatWitRet(t.singa_tensor, x, out.singa_tensor)
+            singa.PowFloatWitRet(t.data, x, out.data)
         return out
 
 
@@ -778,18 +766,16 @@ def average(t, axis=None):
         a float value if axis is None; otherwise, a new Tensor for the result.
     '''
     if t.ndim() > 1:
-        return _call_singa_func(singa.Average, t.singa_tensor, axis)
+        return _call_singa_func(singa.Average, t.data, axis)
     else:
-        return singa.SumAsFloat(t.singa_tensor) / t.size()
+        return singa.SumAsFloat(t.data) / t.size()
 
 
 def softmax(t, out=None):
     '''Apply SoftMax for each row of the Tensor.
-
     Args:
         t (Tensor): the input 1d or 2d tensor
         out (Tensor, optional): if not None, it is used to store the result
-
     Returns:
         the result Tensor
     '''
@@ -873,9 +859,9 @@ def add(lhs, rhs, ret=None):
         return lhs + rhs
     else:
         if isinstance(rhs, Tensor):
-            singa.Add(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor)
+            singa.Add(lhs.data, rhs.data, ret.data)
         else:
-            singa.AddFloatWithRet(lhs.singa_tensor, rhs, ret.singa_tensor)
+            singa.AddFloatWithRet(lhs.data, rhs, ret.data)
         return ret
 
 
@@ -896,9 +882,9 @@ def sub(lhs, rhs, ret=None):
         return lhs - rhs
     else:
         if isinstance(rhs, Tensor):
-            singa.Sub(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor)
+            singa.Sub(lhs.data, rhs.data, ret.data)
         else:
-            singa.SubFloatWithRet(lhs.singa_tensor, rhs, ret.singa_tensor)
+            singa.SubFloatWithRet(lhs.data, rhs, ret.data)
         return ret
 
 
@@ -920,11 +906,11 @@ def eltwise_mult(lhs, rhs, ret=None):
         return lhs * rhs
     else:
         if isinstance(rhs, Tensor):
-            singa.EltwiseMult(lhs.singa_tensor, rhs.singa_tensor,
-                              ret.singa_tensor)
+            singa.EltwiseMult(lhs.data, rhs.data,
+                              ret.data)
         else:
-            singa.EltwiseMultFloatWithRet(lhs.singa_tensor, rhs,
-                                          ret.singa_tensor)
+            singa.EltwiseMultFloatWithRet(lhs.data, rhs,
+                                          ret.data)
         return ret
 
 
@@ -946,10 +932,10 @@ def mult(A, B, C=None, alpha=1.0, beta=0.0):
         the result Tensor
     '''
     if C is None:
-        return _call_singa_func(singa.Mult, A.singa_tensor, B.singa_tensor)
+        return _call_singa_func(singa.Mult, A.data, B.data)
     else:
-        singa.MultWithScale(alpha, A.singa_tensor, B.singa_tensor,
-                            beta, C.singa_tensor)
+        singa.MultWithScale(alpha, A.data, B.data,
+                            beta, C.data)
         return C
 
 
@@ -970,9 +956,9 @@ def div(lhs, rhs, ret=None):
         return lhs / rhs
     else:
         if isinstance(rhs, Tensor):
-            singa.Div(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor)
+            singa.Div(lhs.data, rhs.data, ret.data)
         else:
-            singa.DivFloatWithRet(lhs.singa_tensor, rhs, ret.singa_tensor)
+            singa.DivFloatWithRet(lhs.data, rhs, ret.data)
         return ret
 
 
@@ -987,7 +973,7 @@ def axpy(alpha, x, y):
     Returns:
         y
     '''
-    singa.Axpy(float(alpha), x.singa_tensor, y.singa_tensor)
+    singa.Axpy(float(alpha), x.data, y.data)
     return y
 
 
@@ -1001,7 +987,7 @@ def bernoulli(p, t):
     Returns:
         t
     '''
-    singa.Bernoulli(float(p), t.singa_tensor)
+    singa.Bernoulli(float(p), t.data)
     return t
 
 
@@ -1016,7 +1002,7 @@ def gaussian(mean, std, t):
     Returns:
         t
     '''
-    singa.Gaussian(float(mean), float(std), t.singa_tensor)
+    singa.Gaussian(float(mean), float(std), t.data)
     return t
 
 
@@ -1031,7 +1017,7 @@ def uniform(low, high, t):
     Returns:
         t
     '''
-    singa.Uniform(float(low), float(high), t.singa_tensor)
+    singa.Uniform(float(low), float(high), t.data)
     return t
 
 
@@ -1048,8 +1034,8 @@ def add_column(alpha, v, beta, M):
     Returns:
         M
     '''
-    singa.AddColumnWithScale(float(alpha), float(beta), v.singa_tensor,
-                             M.singa_tensor)
+    singa.AddColumnWithScale(float(alpha), float(beta), v.data,
+                             M.data)
     return M
 
 
@@ -1066,7 +1052,7 @@ def add_row(alpha, v, beta, M):
     Returns:
         M
     '''
-    singa.AddRowWithScale(alpha, beta, v.singa_tensor, M.singa_tensor)
+    singa.AddRowWithScale(alpha, beta, v.data, M.data)
     return M
 
 
@@ -1080,8 +1066,8 @@ def sum_columns(M):
         a new Tensor as the resulted column.
     '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    ret = Tensor((M.shape[0], 1), M.singa_tensor.device())
-    singa.SumColumns(M.singa_tensor, ret.singa_tensor)
+    ret = Tensor((M.shape[0], 1), M.data.device())
+    singa.SumColumns(M.data, ret.data)
     return ret
 
 
@@ -1095,8 +1081,8 @@ def sum_rows(M):
         a new Tensor as the resulted row.
     '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    ret = Tensor((1, M.shape[1]), M.singa_tensor.device())
-    singa.SumRows(M.singa_tensor, ret.singa_tensor)
+    ret = Tensor((1, M.shape[1]), M.data.device())
+    singa.SumRows(M.data, ret.data)
     return ret
 
 
@@ -1110,204 +1096,331 @@ def _call_singa_func(_singa_func, *args):
         e.g., Tensor [singa_func](args...)
     '''
     new_t = Tensor()
-    new_t.singa_tensor = _singa_func(*args)
-    new_t.shape = tuple(new_t.singa_tensor.shape())
-    new_t.device = new_t.singa_tensor.device()
-    new_t.dtype = new_t.singa_tensor.data_type()
+    new_t.data = _singa_func(*args)
+    new_t.shape = tuple(new_t.data.shape())
+    new_t.device = new_t.data.device()
+    new_t.dtype = new_t.data.data_type()
     return new_t
 
 
-def copy_from_numpy(singa_tensor, np_array):
+def copy_from_numpy(data, np_array):
     '''
     Copy the data from the numpy array.
     '''
-    assert np_array.size == singa_tensor.Size(), 'tensor shape should be the 
same'
+    assert np_array.size == data.Size(), \
+        'tensor shape should be the same'
     if not np_array.ndim == 1:
         np_array = np_array.flatten()
     dt = np_array.dtype
     if dt == np.float32:
-        singa_tensor.CopyFloatDataFromHostPtr(np_array)
+        data.CopyFloatDataFromHostPtr(np_array)
     elif dt == np.int or dt == np.int32:
-        singa_tensor.CopyIntDataFromHostPtr(np_array)
+        data.CopyIntDataFromHostPtr(np_array)
     else:
         print('Not implemented yet for ', dt)
 
 
 class Operation(object):
     '''
-    Wrap normal functions such as dot to realize autograd.
+    An operation includes the forward and backward function of
+    tensor calculation.
 
+    To add a specific operation Xxxx, subclass Operation and implement
+    forward() and backward(). Then implement a function xxxx which creates
+    a Xxxx instance and calls __call__ to do forward. The autograd engine
+    is able to do backward propagation by calling the backward() of Xxxx
+    automatically. Notice that the tensors are CTensor. NOT Python Tensor.
+    The arguments of forward includes both CTensor instances and other
+    types of data; The backward function ONLY supports CTensor args.
     '''
-    def __init__(self, **operation_params):
-        pass
 
-    def __call__(self, *input):
-        return self._do_forward(*input)
+    def __call__(self, *xs):
+        return self._do_forward(*xs)
+
+    def _do_forward(self, *xs):
+        # TODO add the pre hook
+        # filter out args that are not Tensor instances
+        tensor_xs = [x for x in xs if isinstance(x, Tensor)]
 
-    def _do_forward(self, *input):
-        unpacked_input = tuple(arg.singa_tensor for arg in input)
-        raw_output = self.forward(*unpacked_input)
-        if not isinstance(raw_output, tuple):
-            raw_output = (raw_output,)
-        self.needs_input_grad = tuple(arg.creator.requires_grad for arg in 
input)
-        self.requires_grad = any(self.needs_input_grad)
-        output = tuple(Tensor(data=data, creator=self) for data in raw_output)
-        self.previous_functions = [(arg.creator, id(arg)) for arg in input]
-        self.output_ids = {id(var): i for i, var in enumerate(output)}
-        return output
+        # need to do backward if any of its input arg needs gradient
+        self.requires_grad = any([x.requires_grad for x in tensor_xs])
+        # src records info of every input arg that needs gradient
+        # the backward() function computes grad only for those arg
 
-    def _do_backward(self, grad_output):
-        grad_input = self.backward(grad_output)
-        if not isinstance(grad_input, tuple):
-            grad_input = (grad_input,)
-        return grad_input
+        self.src = []
+        for x in tensor_xs:
+            if x.stores_grad:
+                self.src.append((x.creator, id(x), x, x.stores_grad))
+            else:
+                self.src.append((x.creator, id(x), None, x.stores_grad))
+
+        # use the CTensor (data) if the input arg is Tensor
+        xs = tuple(x.data if isinstance(x, Tensor) else x for x in xs)
+        ys = self.forward(*xs)
+        if not isinstance(ys, tuple):
+            ys = (ys,)
+        # create Tensor based on CTensor(data);
+        # assume outputs are all Tensor instances
+        ys = tuple(Tensor(device=y.device,
+                          data=y,
+                          requires_grad=self.requires_grad,
+                          creator=self) for y in ys)
+        # map from python id to output index
+        self.y_ids = {id(y): i for i, y in enumerate(ys)}
+        # TODO add the post hook
+        return ys
+
+    def _do_backward(self, *dys):
+        dxs = self.backward(*dys)
+        if not isinstance(dxs, tuple):
+            dxs = (dxs,)
+        return dxs
+
+    def forward(self, *xs):
+        '''Forward propagation.
+
+        Args:
+            xs: input args consisting of CTensors and others.
 
-    def forward(self, *input):
+        Returns:
+            CTensor instance(s)
+        '''
         raise NotImplementedError
 
-    def backward(self, *grad_output):
+    def backward(self, *dys):
+        ''' Backward propagation.
+
+        Args:
+            dys: input args consisting of CTensors.
+
+        Returns:
+            CTensor instance(s)
+        '''
         raise NotImplementedError
 
 
-class Initializer(Operation):
-    '''
-    For Tensor without creator, Initializer can act as its creator.
-    It is commonly used in feeding training data or initialize parameters like 
weights and bias.
+class Dummy(Operation):
+    '''Dummy operation whice serves as a placehoder for autograd
 
+    Args:
+        name(string): set it for debug
     '''
-    def __init__(self, Tensor, requires_grad):
-        self.Tensor = Tensor
-        self.output_ids = {id(Tensor): 0}
-        self.previous_functions = []
-        self.requires_grad = requires_grad
-        shape = self.Tensor.singa_tensor.shape()
-        self.init = singa.Tensor(list(shape))
-        copy_from_numpy(self.init, np.zeros(shape=shape, dtype=np.float32))
-        self.grads = self.init.Clone()
 
-    def _do_forward(self):
-        raise NotImplementedError
-
-    def _do_backward(self, *dy):
-        assert len(dy) == 1
-        self.grads = singa.__add__(self.grads, dy[0])
-        return tuple()
+    def __init__(self, tensor, name=None):
+        self.name = name
+        self.src = []
+        self.y_ids = {id(tensor): 0}
+        self.requires_grad = False
 
 
 class ReLU(Operation):
+
     def forward(self, x):
         '''
-        forward function for ReLU Operation.
+        Args:
+            x(CTensor): input tensor
 
+        Returns:
+            a new CTensor whose element y = x if x >= 0; otherwise 0;
         '''
-        self.input = (x,)
+        self.input = x
         return singa.ReLU(x)
 
     def backward(self, dy):
         '''
-        backward function for ReLU Operation.
+        Args:
+            dy(CTensor): dL / dy
+
+        Returns:
+            dx(CTensor): dL / dx = dy if x >= 0; otherwise 0;
         '''
-        dx = singa.GTFloat(self.input[0], 0.0)
+        dx = singa.GTFloat(self.input, 0.0)
         return singa.__mul__(dy, dx)
+
+
 def relu(x):
     return ReLU()(x)[0]
 
 
-class Dot(Operation):
+class Matmul(Operation):
+    '''For matrix multiplication'''
+
     def forward(self, x, w):
-        '''
-        forward function for Dot Operation.
+        '''Do forward propgation.
+
+        Store the x(or w) if w(or x) requires gradient.
 
+        Args:
+            x (CTensor): matrix
+            w (CTensor): matrix
+
+        Returns:
+            a CTensor for the result
         '''
         self.input = (x, w)
         return singa.Mult(x, w)
 
     def backward(self, dy):
         '''
-        backward function for Dot Operation.
+        Args:
+            dy (CTensor): data for the dL / dy, L is the loss
 
+        Returns:
+            a tuple for (dx, dw)
         '''
         return singa.Mult(dy, self.input[1].T()), 
singa.Mult(self.input[0].T(), dy)
-def dot(x, w):
-    return Dot()(x, w)[0]
 
 
-class Add_Bias(Operation):
-    def forward(self, b, x):
+def matmul(x, w):
+    return Matmul()(x, w)[0]
+
+
+class AddBias(Operation):
+    '''
+    Add Bias to each row / column of the Tensor, depending on the parameter 
axis.
+    '''
+    def __init__(self, axis=0):
+        '''
+        To indicate the calculation axis, 0 for row, 1 for column.
+        
+        Args:
+            axis: 0 or 1, default is 0.
+        '''
+        self.axis = axis
+
+    def forward(self, x, b):
         '''
-        forward function for Add_Bias Operation.
+        Args:
+            x: matrix.
+            b: bias to be added.
 
+        Return:
+            the result Tensor
         '''
-        singa.AddRow(b, x)
+        if self.axis == 0:
+            singa.AddRow(b, x)
+        elif self.axis == 1:
+            singa.AddColumn(b, x)
         return x
 
     def backward(self, dy):
         '''
-        backward function for Add_Bias Operation.
+        Args:
+            dy (CTensor): data for the dL / dy, L is the loss.
 
+        Return:
+            a tuple for (db, dx), db is data for dL / db, dx is data
+            for dL / dx.
         '''
-        return singa.Sum(dy, 0), dy
-def add_bias(b, x):
-    return Add_Bias()(b, x)[0]
+        if self.axis == 0:
+            return dy, singa.Sum(dy, 0)
+        elif self.axis == 1:
+            return dy, singa.Sum(dy, 0)
+
+
+def add_bias(x, b, axis=0):
+    return AddBias(axis)(x, b)[0]
 
 
 class SoftMax(Operation):
+    '''
+    Apply SoftMax for each row of the Tensor or each column of the Tensor
+    according to the parameter axis.
+    '''
+    def __init__(self, axis=0):
+        self.axis = axis
+
     def forward(self, x):
         '''
-        forward function for SoftMax Operation.
+        Args:
+            x(data): the input 1d or 2d tensor
 
+        Returns:
+            the result Tensor
         '''
-        self.output = (singa.SoftMax(x),)
-        return self.output[0]
+        if self.axis == 1:
+            x = x.T()
+        self.output = singa.SoftMax(x)
+        if self.axis == 0:
+            return self.output
+        elif self.axis == 1:
+            return self.output.T()
 
     def backward(self, dy):
         '''
-        backward function for SoftMax Operation.
+        Args:
+            dy (CTensor): data for the dL / dy, L is the loss
 
+        Returns:
+            dx (Ctensor): data for the dL / dx, L is the loss,
+            x is the input of current Opertion
         '''
-        # calculations are made on numpy
-        grad = To_Numpy(dy)
-        output = To_Numpy(self.output[0])
+        # calculations are made on numpy array
+        if self.axis == 1:
+            dy = dy.T()
+        grad = ctensor2numpy(dy)
+        output = ctensor2numpy(self.output)
         out_1 = np.einsum('ki,ki->ki', grad, output)
         medium_out = np.einsum('ki,kj->kij', output, output)
         out_2 = np.einsum('kij,kj->ki', medium_out, grad)
         out = out_1 - out_2
-        out_singa = singa.Tensor(out_1.shape)
-        out_singa.CopyFloatDataFromHostPtr(out.flatten())
-        return out_singa
-def softmax(x):
-    return SoftMax()(x)[0]
+        dx = CTensor(out_1.shape)
+        dx.CopyFloatDataFromHostPtr(out.flatten())
+        if self.axis == 0:
+            return dx
+        elif self.axis == 1:
+            return dx.T()
+
+
+def softmax(x, axis=0):
+    return SoftMax(axis)(x)[0]
+
 
+class CrossEntropy(Operation):
+    '''
+    Calculte CrossEntropy loss for a batch of training data.
 
-class Cross_Entropy(Operation):
-    def forward(self, pred, target):
+    '''
+    def forward(self, x, t):
         '''
-        forward function for Cross_Entropy Operation.
+        Args:
+            x (CTensor): 1d or 2d tensor, the prediction data(output) of 
current network.
+            t (CTensor): 1d or 2d tensor, the target data for training.
 
+        Returns:
+            loss (CTensor): scalar.
         '''
-        loss = singa.Tensor((1,))
-        loss.SetFloatValue(-singa.SumAsFloat(singa.__mul__(target, 
singa.Log(pred)))/pred.shape()[0])
-        self.input = (pred, target)
+        loss = CTensor((1,))
+        loss_data = -singa.SumAsFloat(singa.__mul__(t, singa.Log(x)))
+        loss.SetFloatValue(loss_data / x.shape()[0])
+        self.input = (x, t)
         return loss
 
     def backward(self, dy):
         '''
-        backward function for Cross_Entropy Operation.
+        Args:
+            dy (float or CTensor): scalar, accumulate gradient from outside of 
current network, usually
+            equal to 1.0
 
+        Returns:
+            dx (CTensor): data for the dL /dx, L is the loss, x is the output 
of current network.
+            note that this is true for dy = 1.0
         '''
         dx = singa.__div__(self.input[1], self.input[0])
-        dx *= float(-1/self.input[0].shape()[0])
-        if not isinstance(dy, singa.Tensor):
+        dx *= float(-1 / self.input[0].shape()[0])
+        if isinstance(dy, float):
             # dtype of dy: float
             dx *= dy
-            return dx
-        else:
-            pass  # TODO
+            return dx, None
+        elif isinstance(dy, CTensor):
+            pass  # TODO, broadcast elementwise multiply seems not support
+
+
 def cross_entropy(y, t):
-    return Cross_Entropy()(y, t)[0]
+    return CrossEntropy()(y, t)[0]
 
 
-def To_Numpy(x):
+def ctensor2numpy(x):
     '''
     To be used in SoftMax Operation.
     Convert a singa_tensor to numpy_tensor.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f42d4d07/python/singa/utils.py
----------------------------------------------------------------------
diff --git a/python/singa/utils.py b/python/singa/utils.py
index c446984..97902a0 100644
--- a/python/singa/utils.py
+++ b/python/singa/utils.py
@@ -17,6 +17,7 @@
 
 import sys
 
+
 def update_progress(progress, info):
     """Display progress bar and user info.
 
@@ -38,10 +39,9 @@ def update_progress(progress, info):
         progress = 1
         status = "Done. "
     status = status + info
-    block = int(round(barLength*progress))
-    text = "[{0}] {1:3.1f}% {2}".format("."*block + " "*(barLength-block),
-                                        progress*100, status)
+    block = int(round(barLength * progress))
+    text = "[{0}] {1:3.1f}% {2}".format("." * block + " " * (barLength - 
block),
+                                        progress * 100, status)
     sys.stdout.write(text)
-    sys.stdout.write('\b'*(9 + barLength + len(status)))
+    sys.stdout.write('\b' * (9 + barLength + len(status)))
     sys.stdout.flush()
-

[3/4] incubator-singa git commit: SINGA-348 Support autograd MLP Example

Reply via email to