[1/4] incubator-singa git commit: SINGA-348 Support autograd MLP Example

wangwei Thu, 12 Apr 2018 02:16:59 -0700

Repository: incubator-singa
Updated Branches:
  refs/heads/master 163452e74 -> 060e7dfe1



SINGA-348 Support autograd MLP Example

Fix some bugs and rename some files, functions and variables.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/755eba69
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/755eba69
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/755eba69

Branch: refs/heads/master
Commit: 755eba69a9b010b4eb4ab5c9277fc6487998d55a
Parents: e09dff4
Author: xuewanqi <[email protected]>
Authored: Thu Apr 5 18:45:32 2018 +0800
Committer: Wang Wei <[email protected]>
Committed: Thu Apr 12 16:59:47 2018 +0800

----------------------------------------------------------------------
 examples/MLP.py        |  82 ++++++++--------
 python/singa/engine.py | 141 +++++++++++++++++++++++++++
 python/singa/tensor.py | 226 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 402 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/755eba69/examples/MLP.py
----------------------------------------------------------------------
diff --git a/examples/MLP.py b/examples/MLP.py
index 54ae1ad..405d998 100644
--- a/examples/MLP.py
+++ b/examples/MLP.py
@@ -4,14 +4,11 @@ from singa import engine
 from singa import singa_wrap as singa
 import numpy as np
 
-def print_singa_tensor(x):
-    np_array = x.GetFloatValue(int(x.Size()))
-    print(np_array.reshape(x.shape()))
-    return
 
-if __name__ =='__main__':
+if __name__ == '__main__':
+
+    # prepare training data in numpy array
 
-    #prepare numpy training data
     # generate the boundary
     f = lambda x: (5 * x + 1)
     bd_x = np.linspace(-1., 1, 200)
@@ -24,18 +21,17 @@ if __name__ =='__main__':
     data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np.float32)
 
     def to_categorical(y, num_classes=None):
-        """Converts a class vector (integers) to binary class matrix.
-
-        E.g. for use with categorical_crossentropy.
+        '''
+        Converts a class vector (integers) to binary class matrix.
 
-        # Arguments
+        Args
             y: class vector to be converted into a matrix
                 (integers from 0 to num_classes).
             num_classes: total number of classes.
 
-        # Returns
+        Return
             A binary matrix representation of the input.
-        """
+        '''
         y = np.array(y, dtype='int')
         input_shape = y.shape
         if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
@@ -50,59 +46,65 @@ if __name__ =='__main__':
         categorical = np.reshape(categorical, output_shape)
         return categorical
 
-    label=to_categorical(label,2).astype(np.float32)
-    print 'train_data_shape:',data.shape,'train_label_shape:',label.shape
+    label = to_categorical(label,2).astype(np.float32)
+    print 'train_data_shape:', data.shape, 'train_label_shape:', label.shape
 
-    # send numpy data to singa_tensor
-    tr_data=singa.Tensor((400,2))
+    # send training data(numpy array) to singa_tensor
+    tr_data = singa.Tensor((400, 2))
     tr_data.CopyFloatDataFromHostPtr(data.flatten())
 
-    tr_label=singa.Tensor((400,2))
+    tr_label = singa.Tensor((400, 2))
     tr_label.CopyFloatDataFromHostPtr(label.flatten())
 
-    w_0=singa.Tensor((2,3))
+    w_0 = singa.Tensor((2, 3))
     singa.Gaussian(float(0), float(0.1), w_0)
-    b_0=singa.Tensor((1,3))
+    b_0 = singa.Tensor((1, 3))
     b_0.SetFloatValue(float(0))
 
-    w_1=singa.Tensor((3,2))
+    w_1 = singa.Tensor((3, 2))
     singa.Gaussian(float(0), float(0.1), w_1)
-    b_1=singa.Tensor((1,2))
+    b_1 = singa.Tensor((1, 2))
     b_1.SetFloatValue(float(0))
 
+    # initialize tensor.Tensor using singa_tensor
+    inputs = tensor.Tensor(data=tr_data, requires_grad=False, 
grad_outlet=False)
+    target = tensor.Tensor(data=tr_label, requires_grad=False, 
grad_outlet=False)
 
-    # initialize Tensor using singa_tensor
-    inputs=tensor.Tensor(data=tr_data,requires_grad=False,grad_outlet=False)
-    target=tensor.Tensor(data=tr_label,requires_grad=False,grad_outlet=False)
-
-    weight_0=tensor.Tensor(data=w_0,requires_grad=True,grad_outlet=True)
-    bias_0=tensor.Tensor(data=b_0,requires_grad=True,grad_outlet=True)
+    weight_0 = tensor.Tensor(data=w_0, requires_grad=True, grad_outlet=True)
+    bias_0 = tensor.Tensor(data=b_0, requires_grad=True, grad_outlet=True)
 
-    weight_1=tensor.Tensor(data=w_1,requires_grad=True,grad_outlet=True)
-    bias_1=tensor.Tensor(data=b_1,requires_grad=True,grad_outlet=True)
+    weight_1 = tensor.Tensor(data=w_1, requires_grad=True, grad_outlet=True)
+    bias_1 = tensor.Tensor(data=b_1, requires_grad=True, grad_outlet=True)
 
-    def update(lr,param,grad): #param:Tensor grad:singa_tensor
+    def update(lr, param, grad):
+        '''
+        To update the value of parameters
+        Args:
+            param: tensor.Tensor
+            grad: singa_tensor
+        '''
         grad *= float(lr)
         assert param.singa_tensor.shape() == grad.shape()
-        param.singa_tensor = singa.__sub__(param.singa_tensor,grad)
+        param.singa_tensor = singa.__sub__(param.singa_tensor, grad)
         return
 
-    lr=0.05
+    # training process
+    lr = 0.05
     for i in range(1001):
-        outputs=tensor.dot(inputs,weight_0)
-        outputs=tensor.add_bias(bias_0,outputs)
-        outputs=tensor.relu(outputs)
+        outputs = tensor.dot(inputs, weight_0)
+        outputs = tensor.add_bias(bias_0, outputs)
+        outputs = tensor.relu(outputs)
         outputs = tensor.dot(outputs, weight_1)
         outputs = tensor.add_bias(bias_1, outputs)
-        outputs=tensor.softmax(outputs)
+        outputs = tensor.softmax(outputs)
 
-        loss=tensor.cross_entropy(outputs,target)
+        loss = tensor.cross_entropy(outputs, target)
 
-        grads=float(1)
+        grads = float(1)
         in_grads = engine.gradients(loss, grads)
 
         for param in in_grads:
-            update(lr,param,in_grads[param])
+            update(lr, param, in_grads[param])
 
         if (i % 100 == 0):
-            print 'training loss = ' ,float(tensor.To_numpy(loss.singa_tensor))
\ No newline at end of file
+            print 'training loss = ', float(tensor.To_Numpy(loss.singa_tensor))
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/755eba69/python/singa/engine.py
----------------------------------------------------------------------
diff --git a/python/singa/engine.py b/python/singa/engine.py
new file mode 100644
index 0000000..a326ab4
--- /dev/null
+++ b/python/singa/engine.py
@@ -0,0 +1,141 @@
+from collections import Counter
+from singa import singa_wrap as singa
+from singa import tensor
+
+
+class GradientFlowController(object):
+    '''
+    Control backward gradients flow by running the method, run_backward()
+
+    '''
+    def __init__(self):
+        pass
+
+    def dependency_check(self, function):
+        '''
+        Compute how many times each 'previous_function'(Operation object) 
influent its next_functions
+
+        though which outputs
+
+        Arg:
+            function: a Operation object which is the termination
+
+        Return:
+            dependencies: a dictionary recording dependencies among 
functions(Operations)
+            seen: a set recording all functions(Operations) observed
+
+        '''
+        dependencies = {}
+        seen = {function}
+        queue = [function]
+        while len(queue) > 0:
+            f = queue.pop()
+            for previous_function, Arg_ID in f.previous_functions:
+                if previous_function not in dependencies:
+                    dependencies[previous_function] = [Counter() for _ in 
previous_function.output_ids]
+                output_idx = previous_function.output_ids[Arg_ID]
+                dependencies[previous_function][output_idx][f] += 1
+                if previous_function not in seen:
+                    queue.append(previous_function)
+                    seen.add(previous_function)
+        return dependencies, seen
+
+    def dependency_release(self, dependencies, previous_function, function, 
Arg_ID):
+        '''
+        To release dependency: if previous_function receive one gradient 
though its
+
+        output(can be found by Arg_ID) from function, the corresponding 
dependency counter
+
+        minus one.
+
+        '''
+        deps = dependencies[previous_function]
+        output_idx = previous_function.output_ids[Arg_ID]
+        output_deps = deps[output_idx]
+        output_deps[function] -= 1
+        if output_deps[function] == 0:
+            del output_deps[function]
+        return output_idx
+
+    def is_ready_for_backward(self, dependencies, function):
+        '''
+        Check if a function(Operation) is ready for backward.
+
+        Return: Trur or Flase
+
+        '''
+        for deps in dependencies[function]:
+            if len(deps) > 0:
+                return False
+        return True
+
+    def run_backward(self, Tensor, grad):
+        '''
+        Run the autograd process.
+
+        Args:
+            Tensor: the object tensor to optimize, usually the loss
+            grad: received gradients
+
+        Return:
+            gradients: a dictionary recording the gradients
+
+        '''
+        ready = [(Tensor.creator, (grad,))]
+        not_ready = {}
+
+        dependencies, seen = self.dependency_check(Tensor.creator)
+
+        while len(ready) > 0:
+            function, grad = ready.pop()
+            gradient_inputs = function._do_backward(*grad)
+            for (previous_function, Arg_ID), gradient_input in 
zip(function.previous_functions, gradient_inputs):
+                if not previous_function.requires_grad:
+                    continue
+                
+                output_index = self.dependency_release(dependencies, 
previous_function, function, Arg_ID)
+                is_ready = self.is_ready_for_backward(dependencies, 
previous_function)
+                
+                if is_ready:
+                    if previous_function in not_ready:
+                        previous_functions_gradients = 
not_ready[previous_function]
+                        if not previous_functions_gradients[output_index]:
+                            previous_functions_gradients[output_index] = 
gradient_input
+                        else:
+                            previous_functions_gradients[output_index] = \
+                                
singa.__add__(previous_functions_gradients[output_index], gradient_input)
+                        del not_ready[previous_function]
+                    else:
+                        assert output_index == 0
+                        previous_functions_gradients = (gradient_input,)
+                    ready.append((previous_function, 
previous_functions_gradients))
+                else:
+                    if previous_function in not_ready:
+                        previous_functions_gradients = 
not_ready[previous_function]
+                    else:
+                        previous_functions_gradients = [None for _ in 
previous_function.output_ids]
+
+                    if not previous_functions_gradients[output_index]:
+                        previous_functions_gradients[output_index] = 
gradient_input
+                    else:
+                        previous_functions_gradients[output_index] = \
+                            
singa.__add__(previous_functions_gradients[output_index], gradient_input)
+
+                    not_ready[previous_function] = previous_functions_gradients
+
+        gradients = {}
+        for f in seen:
+            if isinstance(f, tensor.Initializer):
+                if f.Tensor.grad_outlet is True:
+                    gradients[f.Tensor] = f.grads
+                    f.grads = f.init.Clone()
+        return gradients
+
+
+def gradients(Tensor, out_gradient):
+    '''
+    Compute gradients of Tensor.
+
+    '''
+    Controller = GradientFlowController()
+    return Controller.run_backward(Tensor, out_gradient)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/755eba69/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index 2fcadb4..df29cf5 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -69,6 +69,7 @@ int32 = core_pb2.kInt
 float32 = core_pb2.kFloat32
 
 
+
 class Tensor(object):
     '''Create a Py Tensor, which wraps a swig converted Tensor from CPP Tensor
 
@@ -80,13 +81,17 @@ class Tensor(object):
         device: a swig converted Device instance using the device moduel . If 
it
             is None, then the default host device would be used.
         dtype: data type. currently, most operations only accept kFloat32.
-    '''
+        data: a singa_tensor recording input data.
+        creator: a Operation object which generate this tensor.
+        requires_grad: a bool recording if the creator of tensor require 
gradient.
+        grad_outlet: a bool recording if the tensor is a outlet for gradient.
 
-    def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32):
+    '''
+    def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32, 
data=None, creator=None, requires_grad=True,
+                 grad_outlet=False):
         if shape is None:
             # call constructor of singa::Tensor
             self.singa_tensor = singa.Tensor()
-            return
         else:
             assert isinstance(shape, tuple), 'shape should be tuple'
             if device is None:
@@ -94,9 +99,17 @@ class Tensor(object):
                 self.singa_tensor = singa.Tensor(list(shape), device, dtype)
             else:
                 self.singa_tensor = singa.Tensor(list(shape), device, dtype)
-        self.shape = shape
-        self.dtype = dtype
-        self.device = device
+        if data is not None:
+            self.singa_tensor = data
+            if creator is None:
+                creator = Initializer(self, requires_grad)
+
+        self.shape = tuple(self.singa_tensor.shape())
+        self.device = self.singa_tensor.device()
+        self.dtype = self.singa_tensor.data_type()
+
+        self.creator = creator
+        self.grad_outlet = grad_outlet
 
     def ndim(self):
         '''
@@ -384,7 +397,7 @@ class Tensor(object):
         if isinstance(x, Tensor):
             self.singa_tensor /= x.singa_tensor
         else:
-            self.__imul__(1/float(x))
+            self.singa_tensor /= float(x)
         return self
 
     '''
@@ -1102,3 +1115,202 @@ def _call_singa_func(_singa_func, *args):
     new_t.device = new_t.singa_tensor.device()
     new_t.dtype = new_t.singa_tensor.data_type()
     return new_t
+
+
+def copy_from_numpy(singa_tensor, np_array):
+    '''
+    Copy the data from the numpy array.
+    '''
+    assert np_array.size == singa_tensor.Size(), 'tensor shape should be the 
same'
+    if not np_array.ndim == 1:
+        np_array = np_array.flatten()
+    dt = np_array.dtype
+    if dt == np.float32:
+        singa_tensor.CopyFloatDataFromHostPtr(np_array)
+    elif dt == np.int or dt == np.int32:
+        singa_tensor.CopyIntDataFromHostPtr(np_array)
+    else:
+        print('Not implemented yet for ', dt)
+
+
+class Operation(object):
+    '''
+    Wrap normal functions such as dot to realize autograd.
+
+    '''
+    def __init__(self, **operation_params):
+        pass
+
+    def __call__(self, *input):
+        return self._do_forward(*input)
+
+    def _do_forward(self, *input):
+        unpacked_input = tuple(arg.singa_tensor for arg in input)
+        raw_output = self.forward(*unpacked_input)
+        if not isinstance(raw_output, tuple):
+            raw_output = (raw_output,)
+        self.needs_input_grad = tuple(arg.creator.requires_grad for arg in 
input)
+        self.requires_grad = any(self.needs_input_grad)
+        output = tuple(Tensor(data=data, creator=self) for data in raw_output)
+        self.previous_functions = [(arg.creator, id(arg)) for arg in input]
+        self.output_ids = {id(var): i for i, var in enumerate(output)}
+        return output
+
+    def _do_backward(self, grad_output):
+        grad_input = self.backward(grad_output)
+        if not isinstance(grad_input, tuple):
+            grad_input = (grad_input,)
+        return grad_input
+
+    def forward(self, *input):
+        raise NotImplementedError
+
+    def backward(self, *grad_output):
+        raise NotImplementedError
+
+
+class Initializer(Operation):
+    '''
+    For Tensor without creator, Initializer can act as its creator.
+    It is commonly used in feeding training data or initialize parameters like 
weights and bias.
+
+    '''
+    def __init__(self, Tensor, requires_grad):
+        self.Tensor = Tensor
+        self.output_ids = {id(Tensor): 0}
+        self.previous_functions = []
+        self.requires_grad = requires_grad
+        shape = self.Tensor.singa_tensor.shape()
+        self.init = singa.Tensor(list(shape))
+        copy_from_numpy(self.init, np.zeros(shape=shape, dtype=np.float32))
+        self.grads = self.init.Clone()
+
+    def _do_forward(self):
+        raise NotImplementedError
+
+    def _do_backward(self, *dy):
+        assert len(dy) == 1
+        self.grads = singa.__add__(self.grads, dy[0])
+        return tuple()
+
+
+class ReLU(Operation):
+    def forward(self, x):
+        '''
+        forward function for ReLU Operation.
+
+        '''
+        self.input = (x,)
+        return singa.ReLU(x)
+
+    def backward(self, dy):
+        '''
+        backward function for ReLU Operation.
+        '''
+        dx = singa.GTFloat(self.input[0], 0.0)
+        return singa.__mul__(dy, dx)
+def relu(x):
+    return ReLU()(x)[0]
+
+
+class Dot(Operation):
+    def forward(self, x, w):
+        '''
+        forward function for Dot Operation.
+
+        '''
+        self.input = (x, w)
+        return singa.Mult(x, w)
+
+    def backward(self, dy):
+        '''
+        backward function for Dot Operation.
+
+        '''
+        return singa.Mult(dy, self.input[1].T()), 
singa.Mult(self.input[0].T(), dy)
+def dot(x, w):
+    return Dot()(x, w)[0]
+
+
+class Add_Bias(Operation):
+    def forward(self, b, x):
+        '''
+        forward function for Add_Bias Operation.
+
+        '''
+        singa.AddRow(b, x)
+        return x
+
+    def backward(self, dy):
+        '''
+        backward function for Add_Bias Operation.
+
+        '''
+        return singa.Sum(dy, 0), dy
+def add_bias(b, x):
+    return Add_Bias()(b, x)[0]
+
+
+class SoftMax(Operation):
+    def forward(self, x):
+        '''
+        forward function for SoftMax Operation.
+
+        '''
+        self.output = (singa.SoftMax(x),)
+        return self.output[0]
+
+    def backward(self, dy):
+        '''
+        backward function for SoftMax Operation.
+
+        '''
+        # calculations are made on numpy
+        grad = To_Numpy(dy)
+        output = To_Numpy(self.output[0])
+        out_1 = np.einsum('ki,ki->ki', grad, output)
+        medium_out = np.einsum('ki,kj->kij', output, output)
+        out_2 = np.einsum('kij,kj->ki', medium_out, grad)
+        out = out_1 - out_2
+        out_singa = singa.Tensor(out_1.shape)
+        out_singa.CopyFloatDataFromHostPtr(out.flatten())
+        return out_singa
+def softmax(x):
+    return SoftMax()(x)[0]
+
+
+class Cross_Entropy(Operation):
+    def forward(self, pred, target):
+        '''
+        forward function for Cross_Entropy Operation.
+
+        '''
+        loss = singa.Tensor((1,))
+        loss.SetFloatValue(-singa.SumAsFloat(singa.__mul__(target, 
singa.Log(pred)))/pred.shape()[0])
+        self.input = (pred, target)
+        return loss
+
+    def backward(self, dy):
+        '''
+        backward function for Cross_Entropy Operation.
+
+        '''
+        dx = singa.__div__(self.input[1], self.input[0])
+        dx *= float(-1/self.input[0].shape()[0])
+        if not isinstance(dy, singa.Tensor):
+            # dtype of dy: float
+            dx *= dy
+            return dx
+        else:
+            pass  # TODO
+def cross_entropy(y, t):
+    return Cross_Entropy()(y, t)[0]
+
+
+def To_Numpy(x):
+    '''
+    To be used in SoftMax Operation.
+    Convert a singa_tensor to numpy_tensor.
+    '''
+    np_array = x.GetFloatValue(int(x.Size()))
+    return np_array.reshape(x.shape())
\ No newline at end of file

[1/4] incubator-singa git commit: SINGA-348 Support autograd MLP Example

Reply via email to