api - move python to root folder - move python related cmake functions to python cmake files - use add_library OBJ

zhongle Thu, 08 Sep 2016 21:10:43 -0700

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/python/singa/net.py
----------------------------------------------------------------------
diff --git a/python/singa/net.py b/python/singa/net.py
new file mode 100644
index 0000000..0026953
--- /dev/null
+++ b/python/singa/net.py
@@ -0,0 +1,213 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""
+Nerual net class for constructing the nets using layers and providing access
+functions for net info, e.g., parameters.
+"""
+
+
+from .proto.model_pb2 import kTrain, kEval
+import tensor
+import layer
+import cPickle as pickle
+
+
+class FeedForwardNet(object):
+
+    def __init__(self, loss=None, metric=None):
+        self.loss = loss
+        self.metric = metric
+        self.layers = []
+        self.src_of_layer = {}
+        self.dst_of_layer = None
+        self.ordered_layers = None
+
+    def to_device(self, dev):
+        for lyr in self.layers:
+            lyr.to_device(dev)
+
+    def add(self, lyr, src=None):
+        """Append a layer into the layer list.
+
+        This function will get the sample shape from the last layer to setup
+        the newly added layer. For the first layer, it is setup outside.
+        The calling function should ensure the correctness of the layer order.
+
+        Args:
+            lyr (Layer): the layer to be added
+        """
+        if src is not None:
+            if isinstance(src, layer.Layer):
+                assert src.has_setup is True, 'the source layer must be set up'
+                self.src_of_layer[lyr.name] = [src]
+            else:
+                assert type(src) == list, 'the src must be a list of layers'
+                self.src_of_layer[lyr.name] = src
+                # print 'merge------', len(src)
+        else:
+            assert len(self.layers) > 0 or lyr.has_setup, \
+                'Source layers are needed to set up this layer'
+            if len(self.layers) > 0:
+                self.src_of_layer[lyr.name] = [self.layers[-1]]
+            else:
+                self.src_of_layer[lyr.name] = []
+        if lyr.has_setup is False:
+            # print shape
+            in_shape = self.src_of_layer[lyr.name][0].get_output_sample_shape()
+            lyr.setup(in_shape)
+            print lyr.name, lyr.get_output_sample_shape()
+        self.layers.append(lyr)
+        return lyr
+
+    def param_values(self):
+        values = []
+        layers = self.layers
+        if self.ordered_layers is not None:
+            layers = self.ordered_layers
+        for lyr in layers:
+            values.extend(lyr.param_values())
+        return values
+
+    def param_specs(self):
+        specs = []
+        layers = self.layers
+        if self.ordered_layers is not None:
+            layers = self.ordered_layers
+        for lyr in layers:
+            specs.extend(lyr.param_specs)
+        return specs
+
+    def param_names(self):
+        return [spec.name for spec in self.param_specs()]
+
+    def train(self, x, y):
+        out = self.forward(kTrain, x)
+        l = self.loss.forward(kTrain, out, y)
+        if self.metric is not None:
+            m = self.metric.evaluate(out, y)
+        return self.backward(), (l.l1(), m)
+
+    def evaluate(self, x, y):
+        """Evaluate the loss and metric of the given data"""
+        out = self.forward(kEval, x)
+        l = None
+        m = None
+        assert self.loss is not None or self.metric is not None,\
+            'Cannot do evaluation, as neither loss nor metic is set'
+        if self.loss is not None:
+            l = self.loss.evaluate(kEval, out, y)
+        if self.metric is not None:
+            m = self.metric.evaluate(out, y)
+        return l, m
+
+    def predict(self, x):
+        xx = self.forward(kEval, x)
+        return tensor.softmax(xx)
+
+    def topo_sort(self, cur, src_of_layer, visited=None, order=None):
+        if visited is None:
+            visited = {}
+            for name in src_of_layer.keys():
+                visited[name] = False
+            order = []
+        srcs = src_of_layer[cur.name]
+        for src in srcs:
+            if visited[src.name] is False:
+                visited[src.name] = True
+                self.topo_sort(src, src_of_layer, visited, order)
+        order.append(cur)
+        visited[cur.name] = True
+        return order
+
+    def forward(self, flag, x):
+        # print x.l1()
+        if self.ordered_layers is None:
+            self.ordered_layers = self.topo_sort(self.layers[-1],
+                                                 self.src_of_layer)
+        inputs = [x]
+        output_of_layer = {}
+        for cur in self.ordered_layers:
+            srcs = self.src_of_layer[cur.name]
+            disp_src = cur.name + '<--'
+            for src in srcs:
+                outs = output_of_layer[src.name]
+                if type(outs) == list:
+                    inputs.append(outs[0])
+                else:
+                    inputs.append(outs)
+                disp_src += '+' + src.name
+                # del output_of_layer[src.name]
+            # print disp_src
+            if len(inputs) == 1:
+                inputs = inputs[0]
+            output_of_layer[cur.name] = cur.forward(flag, inputs)
+            inputs = []
+            # print lyr.name, x.l1()
+        # print output_of_layer
+        return output_of_layer[self.ordered_layers[-1].name]
+
+    def backward(self):
+        if self.dst_of_layer is None:
+            self.dst_of_layer = {}
+            for cur in self.layers:
+                self.dst_of_layer[cur.name] = []
+            for cur in self.ordered_layers[1:]:
+                srcs = self.src_of_layer[cur.name]
+                for src in srcs:
+                    self.dst_of_layer[src.name].append(cur)
+        grad = self.loss.backward()
+        if len(grad.shape) > 1:
+            grad /= grad.shape[0]  # average across the batch
+        # print 'grad', grad.l1()
+        grads = [grad]
+        output_of_layer = {}
+        pgrads = []
+        for cur in reversed(self.ordered_layers):
+            for dst in self.dst_of_layer[cur.name]:
+                outputs = output_of_layer[dst.name]
+                if type(outputs) == list:
+                    grads.append(outputs[0])
+                else:
+                    grads.append(outputs)
+                # del output_of_layer[dst.name]
+            if len(grads) == 1:
+                grads = grads[0]
+            outs, _pgrads = cur.backward(kTrain, grads)
+            pgrads.append(_pgrads)
+            output_of_layer[cur.name] = outs
+            grads = []
+
+        ret = []
+        for pgrad in reversed(pgrads):
+            ret.extend(pgrad)
+        return ret
+
+    def save(self, f):
+        """Save model parameters using cpickle"""
+        params = {}
+        for (specs, val) in zip(self.param_specs(), self.param_values()):
+            val.to_host()
+            params[specs.name] = tensor.to_numpy(val)
+        with open(f, 'wb') as fd:
+            pickle.dump(params, fd)
+
+    def load(self, f):
+        """Load model parameters using cpickle"""
+        with open(f, 'rb') as fd:
+            params = pickle.load(fd)
+        for (specs, val) in zip(self.param_specs(), self.param_values()):
+            val.copy_from_numpy(params[specs.name])


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/python/singa/optimizer.py b/python/singa/optimizer.py
new file mode 100644
index 0000000..00380e0
--- /dev/null
+++ b/python/singa/optimizer.py
@@ -0,0 +1,377 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# =============================================================================
+'''This module includes a set of optimizers for updating model parameters.
+
+Example usage::
+
+  from singa import optimizer
+  from singa import tensor
+
+  sgd = optimizer.SGD(lr=0.01, momentum=0.9, weight_decay=1e-4)
+  p = tensor.Tensor((3,5))
+  p.uniform(-1, 1)
+  g = tensor.Tensor((3,5))
+  g.gaussian(0, 0.01)
+
+  sgd.apply(1, g, p, 'param')  # use the global lr=0.1 for epoch 1
+  sgd.apply_with_lr(2, 0.03, g, p, 'param')  # use lr=0.03 for epoch 2
+'''
+
+from . import singa_wrap as singa
+import tensor
+from proto import model_pb2
+
+
+class Optimizer(object):
+    '''The base python optimizer class.
+
+    Typically, an optimizer is used as follows:
+
+    1. construct the optimizer
+    2. (optional) register each parameter with its specs.
+    3. use the optimizer to update parameter values given parameter gradients
+       and other optional info
+
+    The subclasses should override the apply_with_lr function to do the real
+    parameter udpate.
+
+    Args:
+        lr (float): a constant for the learning rate, mutually exclusive with
+            'lr_gen'.
+        momentum (float): a constant for the momentum value
+        weight_decay (float): the coefficent for L2 regularizer, which is
+            mutually exclusive with 'regularizer'.
+        lr_gen (function): a function returns the learning rate given
+            the current training step/epoch. It is mutually exclusive with lr.
+            If both are not set, the apply_with_lr function should be used for
+            param updating.
+        regularizer: an instance of Regularizer or RegularizerConf; If set,
+            regularization would be applied in apply_with_lr().
+            Users can also do regularization outside.
+        constraint: an instance of Constraint or ConstraintConf; If set,
+            constraint would be applied inside apply_with_lr(). Users can
+            also do regularization outside.
+    '''
+
+    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        if lr is not None:
+            assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
+
+            def lr_gen(epoch):
+                return lr
+        self.lr_gen = lr_gen
+        self.momentum = momentum
+        if weight_decay is not None:
+            assert regularizer is None, \
+                'Cannot set weight_decay and regularizer at the same time'
+            regularizer = L2Regularizer(weight_decay)
+        if regularizer is not None:
+            if isinstance(regularizer, model_pb2.RegularizerConf):
+                self.regularizer = CppRegularizer(regularizer)
+            else:
+                self.regularizer = regularizer
+        else:
+            self.regularizer = None
+        if constraint is not None:
+            if isinstance(constraint, model_pb2.ConstraintConf):
+                self.constraint = CppConstraint(constraint)
+            else:
+                self.constraint = constraint
+        else:
+            self.constraint = None
+        self.regularizers = {}
+        self.constraints = {}
+        self.decay_multiplier = {}
+        self.learning_rate_multiplier = {}
+
+    def register(self, name, specs):
+        '''Register the param specs, including creating regularizer and
+        constraint per param object. Param specific regularizer and constraint
+        have higher priority than the global ones.
+
+        Args:
+            name (str): parameter name
+            specs (ParamSpec): protobuf obj, including regularizer and
+                constraint, multipliers for learning rate and weight decay.
+        '''
+        assert isinstance(specs, model_pb2.ParamSpec), \
+            'specs should be model_pb2.ParamSpec instance'
+        if specs.HasField('regularizer'):
+            self.regularizers[name] = CppRegularizer(specs.regularizer)
+        elif specs.decay_mult != 1:
+            self.regularizers[name] = L2Regularizer(
+                specs.decay_mult * self.regularizer.coefficient)
+
+        if specs.HasField('constraint'):
+            self.constraints[name] = CppConstraint(specs.constraint)
+
+        if specs.lr_mult != 1:
+            self.learning_rate_multiplier[name] = specs.lr_mult
+
+    def apply_regularizer_constraint(self, epoch, value, grad, name=None):
+        '''Apply regularization and constraint if available.
+
+        If there are both global regularizer (constraint) and param specific
+        regularizer (constraint), it would use the param specific one.
+
+        Args:
+            value (Tensor): parameter value Tensor
+            grad (Tensor): parameter gradient Tensor
+            name (string): to get parameter specific regularizer or constraint
+            epoch (int): some regularizer or constraint would use epoch
+
+        Returns:
+            the updated gradient Tensor
+        '''
+        if name is not None and name in self.constraints:
+            self.constraints[name].apply(epoch, value, grad)
+        elif self.constraint is not None:
+            self.constraint.apply(epoch, value, grad)
+
+        if name is not None and name in self.regularizers:
+            self.regularizers[name].apply(epoch, value, grad)
+        elif self.regularizer is not None:
+            self.regularizer.apply(epoch, value, grad)
+        return grad
+
+    def apply_with_lr(self, epoch, lr, grad, value, name=None):
+        '''Do update with given learning rate.
+
+        The subclass optimizer must override this function.
+
+        Args:
+            epoch (int): training epoch (could be iteration or epoch)
+            lr (float): learning rate
+            grad (Tensor): parameter gradient
+            value (Tesnor): parameter value
+            name (string): paramter name to retrieval parameter specific
+                updating rules (including regularizer and constraint)
+
+        Returns:
+            updated parameter value
+        '''
+        assert False, 'This is the base function, pls call the subclass func'
+        return value
+
+    def apply(self, epoch, grad, value, name=None):
+        '''Do update assuming the learning rate generator is set.
+
+        The subclass optimizer does not need to override this function.
+
+        Args:
+            epoch (int): training epoch (could be iteration or epoch)
+            grad (Tensor): parameter gradient
+            value (Tesnor): parameter value
+            name (string): paramter name to retrieval parameter specific
+                updating rules (including regularizer and constraint)
+
+        Return:
+            updated parameter value
+        '''
+        assert self.lr_gen is not None, 'Learning rate generator is not set.'\
+            'Either set the lr_gen in constructor or call apply_with_lr'
+        lr = self.lr_gen(epoch)
+        return self.apply_with_lr(epoch, lr, grad, value, name)
+
+
+class SGD(Optimizer):
+    '''The vallina Stochasitc Gradient Descent algorithm with momentum.
+
+    See the base Optimizer for all arguments.
+    '''
+
+    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(SGD, self).__init__(lr, momentum, weight_decay, lr_gen,
+                                  regularizer, constraint)
+        conf = model_pb2.OptimizerConf()
+        if self.momentum is not None:
+            conf.momentum = self.momentum
+        conf.type = 'sgd'
+        self.opt = singa.CreateOptimizer('SGD')
+        self.opt.Setup(conf.SerializeToString())
+
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
+        return value
+
+
+class Nesterov(Optimizer):
+    '''The SGD with Nesterov momentum.
+
+    See the base Optimizer for all arguments.
+    '''
+
+    def __init__(self, lr=None, momentum=0.9, weight_decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(Nesterov, self).__init__(lr, momentum, weight_decay, lr_gen,
+                                       regularizer, constraint)
+        conf = model_pb2.OptimizerConf()
+        if self.momentum is not None:
+            conf.momentum = momentum
+        conf.type = 'nesterov'
+        self.opt = singa.CreateOptimizer('Nesterov')
+        self.opt.Setup(conf.SerializeToString())
+
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
+        return value
+
+
+class AdaGrad(Optimizer):
+    '''AdaGrad optimizer.
+
+    See the base Optimizer for all constructor args.
+
+    Args:
+        epsilon (float): small number for preventing numeric error.
+    '''
+
+    def __init__(self, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
+                                      constraint)
+        conf = model_pb2.OptimizerConf()
+        conf.delta = epsilon
+        conf.type = 'adagrad'
+        self.opt = singa.CreateOptimizer('AdaGrad')
+        self.opt.Setup(conf.SerializeToString())
+
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
+        return value
+
+
+class RMSProp(Optimizer):
+    '''RMSProp optimizer.
+
+    See the base Optimizer for all constructor args.
+
+    Args:
+        rho (float): float within [0, 1]
+        epsilon (float): small value for preventing numeric error
+    '''
+
+    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, weight_decay=None,
+                 lr_gen=None, regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
+                                      constraint)
+        conf = model_pb2.OptimizerConf()
+        conf.rho = rho
+        conf.delta = epsilon
+        self.opt = singa.CreateOptimizer('RMSProp')
+        self.opt.Setup(conf.SerializeToString())
+
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
+        return value
+
+
+class Regularizer(object):
+    '''Base Python regularizer for parameter gradients.'''
+
+    def apply(self, value, grad):
+        assert False, 'Not Implemented. Call the subclass function.'
+        return grad
+
+
+class CppRegularizer(Regularizer):
+    '''Wrapper for regularizer implemented using C++.
+
+    Args:
+        conf (RegularizerConf): protobuf message for the configuration.
+    '''
+
+    def __init__(self, conf):
+        self.reg = singa.CreateRegularizer(conf.type)
+        self.reg.Setup(conf.SerializeToString())
+
+    def apply(self, epoch, value, grad):
+        self.reg.Apply(epoch, value.singa_tensor, grad.singa_tensor)
+        return grad
+
+
+class L2Regularizer(Regularizer):
+    '''L2 regularization
+
+    Args:
+        coefficient (float): regularization coefficient.
+    '''
+
+    def __init__(self, coefficient):
+        self.coefficient = coefficient
+
+    def apply(self, epoch, value, grad, coefficient=None):
+        if coefficient is None:
+            assert self.coefficient is not None, 'Must set the coefficient'
+            coefficient = self.coefficient
+        # print coefficient, value.l1(), grad.l1()
+        if coefficient != 0:
+            tensor.axpy(coefficient, value, grad)
+        return grad
+
+
+class Constraint(object):
+    '''Base Python constraint class for paramter gradients'''
+
+    def apply(self, epoch, value, grad):
+        return grad
+
+
+class CppConstraint(Constraint):
+    '''Wrapper for constraints implemented using C++.
+
+    Args:
+        conf (ConstraintConf): protobuf message for the configuration.
+    '''
+
+    def __init__(self, conf):
+        self.constraint = singa.CreateConstraint(conf.type)
+        self.constraint.Setup(conf.SerializeToString())
+
+    def apply(self, epoch, value, grad):
+        self.constraint.Apply(epoch, value.singa_tensor, grad.singa_tensor)
+        return grad
+
+
+class L2Constraint(Constraint):
+    '''Rescale the gradient to make the L2 norm <= a given threshold'''
+
+    def __init__(self, threshold=None):
+        self.threshold = threshold
+
+    def apply(self, epoch, value, grad, threshold=None):
+        if threshold is None:
+            assert self.threshold is not None, 'Must set the threshold'
+            threshold = self.threshold
+        nrm = grad.l2()
+        grad *= threshold / nrm
+        return grad

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
new file mode 100644
index 0000000..f6bca43
--- /dev/null
+++ b/python/singa/tensor.py
@@ -0,0 +1,1011 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# =============================================================================
+"""
+Example usage::
+
+    from singa import tensor
+    from singa import device
+
+    # create a tensor with shape (2,3), default CppCPU device and float32
+    x = tensor.Tensor((2,3))
+    x.set_value(0.4)
+
+    # create a tensor from a numpy array
+    y = tensor.from_numpy((3,3), dtype=np.float32)
+    y.uniform(-1, 1)
+
+    z = mult(x, y)  # gemm -> z of shape (2, 3)
+
+    x += z # element-wise addition
+
+    dev = device.create_cuda_gpu()
+    x.to_device(dev)  # move the data to a gpu device
+
+    r = relu(x)
+
+    r.to_host()  # move the data back to host cpu
+    s = r.to_numpy()  # tensor -> numpy array, r must be on cpu
+
+
+There are two set of tensor functions,
+
+Tensor member functions
+    which would change the internal state of the Tensor instance.
+Tensor module functions
+    which accept Tensor instances as arguments and return Tensor instances.
+
+Every Tesor instance must be initialized before reading data from it.
+"""
+
+import numpy as np
+from functools import reduce
+from .proto import core_pb2
+from . import singa_wrap as singa
+import device as pydevice
+
+
+class Tensor(object):
+    '''Create a Py Tensor, which wraps a swig converted Tensor from CPP Tensor
+
+    The three arguments are three attributes of the Tensor.
+
+    Args:
+        shape (list<int>): a list of integers for the tensor shape. If shape is
+            not specified, the created tensor is called a dummy tensor.
+        device: a swig converted Device instance using the device moduel . If 
it
+            is None, then the default host device would be used.
+        dtype: data type. currently, most operations only accept kFloat32.
+    '''
+
+    def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32):
+        if shape is None:
+            # call constructor of singa::Tensor
+            self.singa_tensor = singa.Tensor()
+            return
+        else:
+            assert isinstance(shape, tuple), 'shape should be tuple'
+            if device is None:
+                device = pydevice.get_default_device()
+                self.singa_tensor = singa.Tensor(list(shape), device, dtype)
+            else:
+                self.singa_tensor = singa.Tensor(list(shape), device, dtype)
+        self.shape = shape
+        self.dtype = dtype
+        self.device = device
+
+    def ndim(self):
+        '''
+        Returns:
+            the number of dimensions of the tensor.
+        '''
+        return self.singa_tensor.nDim()
+
+    def is_transpose(self):
+        '''
+        Returns:
+            True if the internal data is transposed; otherwise False.
+        '''
+        return self.singa_tensor.transpose()
+
+    def size(self):  # TODO(wangwei) compute size
+        '''
+        Returns:
+            the number of elements of the tensor.
+        '''
+        return self.singa_tensor.Size()
+
+    def memsize(self):
+        '''
+        Returns:
+            the number of Bytes allocated for this tensor.
+        '''
+        return self.singa_tensor.MemSize()
+
+    def reshape(self, shape):
+        '''Change the tensor shape.
+
+        Args:
+            shape (list<int>): new shape, which should have the same volumn as
+                the original shape.
+        '''
+        assert product(self.shape) == product(shape), \
+            'product of shape should be equal'
+        self.shape = shape
+        self.singa_tensor.Reshape(list(shape))
+
+    def reset_like(self, t):
+        '''Reset the shape, dtype and device as the given tensor.
+
+        Args:
+            t (Tensor)
+        '''
+        self.singa_tensor.ResetLike(t.singa_tensor)
+        self.shape = t.shape
+        self.device = t.device
+        self.dtype = t.dtype
+
+    '''
+    def as_type(self, dtype):
+        Change the data type.
+
+        Args:
+            dtype:
+        self.singa_tensor.AsType(dtype)
+    '''
+
+    def to_device(self, device):
+        '''Move the tensor data onto a given device.
+
+        Args:
+            device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU
+        '''
+        self.singa_tensor.ToDevice(device)
+        self.device = device
+
+    def to_host(self):
+        '''Move the tensor data onto the default host CppCPU device.
+        '''
+        self.singa_tensor.ToHost()
+        self.device = pydevice.default_device
+
+    def l2(self):
+        '''
+        Returns:
+            the L2 norm.
+        '''
+        return self.singa_tensor.L2()
+
+    def l1(self):
+        '''
+        Returns:
+            the L1 norm.
+        '''
+        return self.singa_tensor.L1()
+
+    def set_value(self, x):
+        '''Set all elements of the tensor to be the give value.
+
+        Args:
+            x (float), a float value to be set to all elements.
+        '''
+        # assert type(x) == float, 'set value only accepts float input'
+        # if isinstance(x, float):
+        self.singa_tensor.floatSetValue(x)
+
+    def copy_from_numpy(self, np_array, offset=0):
+        ''' Copy the data from the numpy array.
+
+        Args:
+            np_array: source numpy array
+            offset (int): destination offset
+        '''
+        assert np_array.size == self.size(), 'tensor shape should be the same'
+        if not np_array.ndim == 1:
+            np_array = np_array.flatten()
+        dt = np_array.dtype
+        if dt == np.float32:
+            self.singa_tensor.floatCopyDataFromHostPtr(np_array)
+        elif dt == np.int or dt == np.int32:
+            self.singa_tensor.intCopyDataFromHostPtr(np_array)
+        else:
+            print 'Not implemented yet for ', dt
+
+    def copy_data(self, t):
+        '''Copy data from other Tensor instance.
+
+        Args:
+            t (Tensor): source Tensor.
+        '''
+        assert isinstance(t, Tensor), 't must be a singa Tensor instance'
+        self.singa_tensor.CopyData(t.singa_tensor)
+
+    def clone(self):
+        '''
+        Returns:
+            a new Tensor which does deep copy of this tensor
+        '''
+        return _call_singa_func(self.singa_tensor.Clone)
+
+    def T(self):
+        ''' shallow copy, negate the transpose field.
+
+        Returns:
+            a new Tensor which shares the underlying data memory (shallow copy)
+            but is marked as a transposed version of this tensor.
+        '''
+        return _call_singa_func(self.singa_tensor.T)
+
+    def copy(self):
+        '''shallow copy calls copy constructor of singa::Tensor
+        '''
+        return _call_singa_func(singa.Tensor, self.singa_tensor)
+
+    def deepcopy(self):
+        '''Same as clone().
+
+        Returns:
+            a new Tensor
+        '''
+        return self.clone()
+
+    def bernoulli(self, p):
+        '''Sample 0/1 for each element according to the given probability.
+
+        Args:
+            p (float): with probability p, each element is sample to 1.
+        '''
+        singa.floatBernoulli(float(p), self.singa_tensor)
+
+    def gaussian(self, mean, std):
+        '''Generate a value for each element following a Gaussian distribution.
+
+        Args:
+            mean (float): mean of the distribution
+            std (float): standard variance of the distribution
+        '''
+        singa.floatGaussian(float(mean), float(std), self.singa_tensor)
+
+    def uniform(self, low, high):
+        '''Generate a value for each element following a uniform distribution.
+
+        Args:
+            low (float): the lower bound
+            high (float): the hight bound
+        '''
+        singa.floatUniform(float(low), float(high), self.singa_tensor)
+
+    def add_column(self, v):
+        '''Add a tensor to each column of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a column to this tensor.
+        '''
+        singa.AddColumn(v.singa_tensor, self.singa_tensor)
+
+    def add_row(self, v):
+        '''Add a tensor to each row of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a row to this tensor.
+        '''
+        singa.AddRow(v.singa_tensor, self.singa_tensor)
+
+    def div_column(self, v):
+        '''Divide each column of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
+        singa.DivColumn(v.singa_tensor, self.singa_tensor)
+
+    def div_row(self, v):
+        '''Divide each row of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
+        singa.DivRow(v.singa_tensor, self.singa_tensor)
+
+    def mult_column(self, v):
+        '''Multiply each column of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
+        singa.MultColumn(v.singa_tensor, self.singa_tensor)
+
+    def mult_row(self, v):
+        '''Multiply each row of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
+        singa.MultRow(v.singa_tensor, self.singa_tensor)
+
+    '''
+    python operators (+=, -=, *=, /=) for singa::Tensor unary operators
+    '''
+
+    def __iadd__(self, x):
+        ''' inplace element-wise addition with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
+        if isinstance(x, Tensor):
+            self.singa_tensor += x.singa_tensor
+        else:
+            self.singa_tensor += float(x)
+        return self
+
+    def __isub__(self, x):
+        ''' inplace element-wise subtraction with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
+
+        if isinstance(x, Tensor):
+            self.singa_tensor -= x.singa_tensor
+        else:
+            self.singa_tensor -= float(x)
+        return self
+
+    def __imul__(self, x):
+        ''' inplace element-wise multiplication with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
+        if isinstance(x, Tensor):
+            self.singa_tensor *= x.singa_tensor
+        else:
+            self.singa_tensor *= float(x)
+        return self
+
+    def __idiv__(self, x):
+        ''' inplace element-wise division by a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
+        if isinstance(x, Tensor):
+            self.singa_tensor /= x.singa_tensor
+        else:
+            self.singa_tensor /= float(x)
+        return self
+
+    '''
+    python operators (+, -, *, /, <, <=, >, >=) for singa binary operators
+    '''
+
+    def __add__(self, rhs):
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(singa.Add_TT,
+                                    self.singa_tensor, rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.Add_Tf,
+                                    self.singa_tensor, rhs)
+
+    def __sub__(self, rhs):
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(singa.Sub_TT,
+                                    self.singa_tensor, rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.Sub_Tf,
+                                    self.singa_tensor, rhs)
+
+    def __mul__(self, rhs):
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(singa.EltwiseMul_TT,
+                                    self.singa_tensor, rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.EltwiseMul_Tf,
+                                    self.singa_tensor, rhs)
+
+    def __div__(self, rhs):
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(singa.Div_TT,
+                                    self.singa_tensor, rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.Div_Tf,
+                                    self.singa_tensor, rhs)
+
+    def __lt__(self, rhs):
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(singa.LT_TT, self.singa_tensor,
+                                    rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.LT_Tf, self.singa_tensor, rhs)
+
+    def __le__(self, rhs):
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(
+                singa.LE_TT,
+                self.singa_tensor,
+                rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.LE_Tf, self.singa_tensor, rhs)
+
+    def __gt__(self, rhs):
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(
+                singa.GT_TT,
+                self.singa_tensor,
+                rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.GT_Tf, self.singa_tensor, rhs)
+
+    def __ge__(self, rhs):
+        if isinstance(rhs, Tensor):
+            return _call_singa_func(
+                singa.GE_TT,
+                self.singa_tensor,
+                rhs.singa_tensor)
+        else:
+            return _call_singa_func(singa.GE_Tf, self.singa_tensor, rhs)
+
+
+''' python functions for global functions in Tensor.h
+'''
+
+
+def from_raw_tensor(t):
+    x = Tensor(t.shape(), t.device(), t.data_type())
+    x.singa_tensor = t
+    return x
+
+
+def from_raw_tensors(tt):
+    ret = []
+    for t in list(tt):
+        ret.append(from_raw_tensor(t))
+    return ret
+
+
+def product(shape):
+    return reduce(lambda x, y: x * y, shape)
+
+
+def sizeof(dtype):
+    '''
+    Returns:
+        the number of bytes of the given SINGA data type defined in core.proto
+    '''
+    return singa.SizeOf(dtype)
+
+
+def reshape(t, s):
+    '''Reshape the input tensor with the given shape.
+
+    Args:
+        t (Tensor): the tensor to be changed
+        s (list<int>): the new shape, which should have the same volumn as the
+            old shape.
+
+    Returns:
+        the new Tensor
+    '''
+    return _call_singa_func(singa.Reshape, t.singa_tensor, s)
+
+
+def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
+    '''Copy the data between two Tensor instances which could be on different
+    devices.
+
+    Args:
+        dst (Tensor): destination Tensor
+        src (Tensor): source Tensor
+        size (int) : number of elements to copy
+        dst_offset (int): offset in terms of elements to the start of dst
+        src_offset (int): offset in terms of elements to the start of src
+    '''
+    singa.CopyDataToFrom(dst.singa_tensor, src.singa_tensor, size,
+                         dst_offset, src_offset)
+
+
+def from_numpy(np_array):
+    '''Create a Tensor instance with the shape, dtype and values from the numpy
+    array.
+
+    Args:
+        np_array: the numpy array.
+
+    Returns:
+        A Tensor instance allocated on the default CppCPU device.
+    '''
+    ret = Tensor(np_array.shape)
+    ret.copy_from_numpy(np_array)
+    return ret
+
+
+def to_numpy(t):
+    '''Convert the tensor into a numpy array.
+
+    Since numpy array is allocated on CPU devices, the input Tensor instance
+    must be on the default CppCPU device.
+
+    Args:
+        t (Tensor), a Tensor on the default CppCPU device.
+
+    Returns:
+        a numpy array
+    '''
+    assert (t.device.id() == -1) or (t.device is None), \
+        'Please move the tensor onto the default host device'
+
+    if t.dtype == core_pb2.kFloat32:
+        np_array = t.singa_tensor.floatGetValue(int(t.size()))
+    elif t.dtype == core_pb2.kInt:
+        np_array = t.singa_tensor.intGetValue(int(t.size()))
+    else:
+        print 'Not implemented yet for ', t.dtype
+    return np_array.reshape(t.shape)
+
+
+def abs(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = abs(x), x is an element of t
+    '''
+    return _call_singa_func(singa.Abs, t.singa_tensor)
+
+
+def exp(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = exp(x), x is an element of t
+    '''
+    return _call_singa_func(singa.Exp, t.singa_tensor)
+
+
+def log(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = log(x), x is an element of t
+    '''
+    return _call_singa_func(singa.Log, t.singa_tensor)
+
+
+def relu(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x if x >0; otherwise 0; x is an element
+        of t
+    '''
+    return _call_singa_func(singa.ReLU, t.singa_tensor)
+
+
+def sigmoid(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = sigmoid(x); x is an element of t
+    '''
+    return _call_singa_func(singa.Sigmoid, t.singa_tensor)
+
+
+def square(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x * x, x is an element of t
+    '''
+    return _call_singa_func(singa.Square, t.singa_tensor)
+
+
+def tanh(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = tanh(x), x is an element of t
+    '''
+    return _call_singa_func(singa.Tanh, t.singa_tensor)
+
+
+def sum(t, axis=None):
+    '''Sum elements of the input tensor long the given axis.
+
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, the summation is done over all elements;
+            if axis is provided, then it is calculated along the given axis,
+            e.g. 0 -- sum each column; 1 -- sum each row.
+
+    Returns:
+        a float value as the sum of all elements, or a new Tensor
+    '''
+
+    if axis is None:
+        return singa.floatSum(t.singa_tensor)
+    else:
+        return _call_singa_func(singa.Sum, t.singa_tensor, axis)
+
+
+def pow(t, x, out=None):
+    '''
+    Args:
+        t (Tensor): input tensor
+        x (float or Tensor): y[i] = t[i]^x if x is a float value; otherwise,
+            y[i]= t[i]^x[i] if x is a tensor.
+        out (None or Tensor): if None, a new Tensor would be constructed to
+            store the result; otherwise, the result is put into out.
+
+    Returns:
+        the result tensor.
+    '''
+    if out is None:
+        if isinstance(x, Tensor):
+            return _call_singa_func(singa.Pow, t.singa_tensor, x.singa_tensor)
+        else:
+            return _call_singa_func(singa.Pow_f, t.singa_tensor, x)
+    else:
+        if isinstance(x, Tensor):
+            singa.Pow(t.singa_tensor, x.singa_tensor, out.singa_tensor)
+        else:
+            singa.Pow_f_out(t.singa_tensor, x, out.singa_tensor)
+        return out
+
+
+def average(t, axis=None):
+    '''
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, average all elements; otherwise average
+            along the given dimension. 0 for averaging each column; 1 for
+            averaging each row.
+
+    Returns:
+        a float value if axis is None; otherwise, a new Tensor for the result.
+    '''
+    if t.ndim() > 1:
+        return _call_singa_func(singa.Average, t.singa_tensor, axis)
+    else:
+        return singa.floatSum(t.singa_tensor) / t.size()
+
+
+def softmax(t, out=None):
+    '''Apply SoftMax for each row of the Tensor.
+
+    Args:
+        t (Tensor): the input 1d or 2d tensor
+        out (Tensor, optional): if not None, it is used to store the result
+
+    Returns:
+        the result Tensor
+    '''
+    if out is None:
+        return _call_singa_func(singa.SoftMax, t.singa_tensor)
+    else:
+        singa.SoftMax(t.singa_tensor, out.singa_tensor)
+        return out
+
+
+def lt(t, x):
+    '''Elementi-wise comparison for t < x
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] < x ? 1.0f:0.0f,
+        or t[i] < x[i] ? 1.0f:0.0f
+    '''
+    return t < x
+
+
+def le(t, x):
+    '''Elementi-wise comparison for t <= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] <= x ? 1.0f:0.0f,
+        or t[i] <= x[i] ? 1.0f:0.0f
+    '''
+    return t <= x
+
+
+def gt(t, x):
+    '''Elementi-wise comparison for t > x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] > x ? 1.0f:0.0f,
+        or t[i] > x[i] ? 1.0f:0.0f
+    '''
+    return t > x
+
+
+def ge(t, x):
+    '''Elementi-wise comparison for t >= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] >= x ? 1.0f:0.0f,
+        or t[i] >= x[i] ? 1.0f:0.0f
+    '''
+    return t >= x
+
+
+def add(lhs, rhs, ret=None):
+    '''Elementi-wise addition.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
+    if ret is None:
+        # call Tensor.__add__()
+        return lhs + rhs
+    else:
+        if isinstance(rhs, Tensor):
+            singa.Add(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor)
+        else:
+            singa.Add_Tf_out(lhs.singa_tensor, rhs, ret.singa_tensor)
+        return ret
+
+
+def sub(lhs, rhs, ret=None):
+    '''Elementi-wise subtraction.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
+    if ret is None:
+        # call Tensor.__sub__()
+        return lhs - rhs
+    else:
+        if isinstance(rhs, Tensor):
+            singa.Sub(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor)
+        else:
+            singa.Sub_Tf_out(lhs.singa_tensor, rhs, ret.singa_tensor)
+        return ret
+
+
+def eltwise_mult(lhs, rhs, ret=None):
+    '''Elementi-wise multiplication.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
+
+    if ret is None:
+        # call Tensor.__mul__()
+        return lhs * rhs
+    else:
+        if isinstance(rhs, Tensor):
+            singa.EltwiseMult(lhs.singa_tensor, rhs.singa_tensor,
+                              ret.singa_tensor)
+        else:
+            singa.EltwiseMult_Tf_out(lhs.singa_tensor, rhs,
+                                     ret.singa_tensor)
+        return ret
+
+
+def mult(A, B, C=None, alpha=1.0, beta=0.0):
+    '''Do matrix-matrix or matrix-vector multiplication.
+
+    This function returns C = alpha * A * B + beta * C
+
+    Args:
+        A (Tensor): 2d Tensor
+        B (Tensor): If B is a 1d Tensor, GEMV would be invoked for 
matrix-vector
+            multiplication; otherwise GEMM would be invoked.
+        C (Tensor, optional): for storing the result; If None, a new Tensor
+            would be created.
+        alpha (float)
+        beta (float)
+
+    Returns:
+        the result Tensor
+    '''
+    if C is None:
+        return _call_singa_func(singa.Mult, A.singa_tensor, B.singa_tensor)
+    else:
+        singa.floatMult(alpha, A.singa_tensor, B.singa_tensor,
+                        beta, C.singa_tensor)
+        return C
+
+
+def div(lhs, rhs, ret=None):
+    '''Elementi-wise division.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
+    if ret is None:
+        # call Tensor.__div__()
+        return lhs / rhs
+    else:
+        if isinstance(rhs, Tensor):
+            singa.Div(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor)
+        else:
+            singa.Div_Tf_out(lhs.singa_tensor, rhs, ret.singa_tensor)
+        return ret
+
+
+def axpy(alpha, x, y):
+    '''Element-wise operation for y += alpha * x.
+
+    Args:
+        alpha (float)
+        x (Tensor)
+        y (Tensor)
+
+    Returns:
+        y
+    '''
+    singa.floatAxpy(float(alpha), x.singa_tensor, y.singa_tensor)
+    return y
+
+
+def bernoulli(p, t):
+    '''Generate a binary value for each element of t.
+
+    Args:
+        p (float): each element is 1 with probability p; and 0 with 1 - p
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatBernoulli(float(p), t.singa_tensor)
+    return t
+
+
+def gaussian(mean, std, t):
+    '''Generate values following a Gaussian distribution.
+
+    Args:
+        mean (float): the mean of the Gaussian distribution.
+        std (float): the standard variance of the Gaussian distribution.
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatGaussian(float(mean), float(std), t.singa_tensor)
+    return t
+
+
+def uniform(low, high, t):
+    '''Generate values following a Uniform distribution.
+
+    Args:
+        low (float): the lower bound
+        hight (float): the higher bound
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatUniform(float(low), float(high), t.singa_tensor)
+    return t
+
+
+def add_column(alpha, v, beta, M):
+    '''Add v to each column of M.
+
+    Denote each column of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
+    singa.floatAddColumn(float(alpha), float(beta), v.singa_tensor,
+                         M.singa_tensor)
+    return M
+
+
+def add_row(alpha, v, beta, M):
+    '''Add v to each row of M.
+
+    Denote each row of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
+    singa.floatAddRow(alpha, beta, v.singa_tensor, M.singa_tensor)
+    return M
+
+
+def sum_columns(M):
+    '''Sum all columns into a single column.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted column.
+    '''
+    assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
+    ret = Tensor((M.shape[0], 1))
+    singa.SumColumns(M.singa_tensor, ret.singa_tensor)
+    return ret
+
+
+def sum_rows(M):
+    '''Sum all rows into a single row.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted row.
+    '''
+    assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
+    ret = Tensor((1, M.shape[1]))
+    singa.SumRows(M.singa_tensor, ret.singa_tensor)
+    return ret
+
+
+''' private functions, internally used
+'''
+
+
+def _call_singa_func(_singa_func, *args):
+    ''' this function calls singa global functions that returns Tensor
+        and create new python Tensor instance
+        e.g., Tensor [singa_func](args...)
+    '''
+    new_t = Tensor()
+    new_t.singa_tensor = _singa_func(*args)
+    new_t.shape = tuple(new_t.singa_tensor.shape())
+    new_t.device = new_t.singa_tensor.device()
+    new_t.dtype = new_t.singa_tensor.data_type()
+    return new_t

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/python/singa/utils.py
----------------------------------------------------------------------
diff --git a/python/singa/utils.py b/python/singa/utils.py
new file mode 100644
index 0000000..a192cff
--- /dev/null
+++ b/python/singa/utils.py
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import sys
+
+
+def update_progress(progress, info):
+    """Display progress bar and user info.
+
+    Args:
+        progress (float): progress [0, 1], negative for halt, and >=1 for done.
+        info (str): a string for user provided info to be displayed.
+    """
+    barLength = 20  # bar length
+    status = ""
+    if isinstance(progress, int):
+        progress = float(progress)
+    if not isinstance(progress, float):
+        progress = 0
+        status = "error: progress var must be float. "
+    if progress < 0:
+        progress = 0
+        status = "Halt. "
+    if progress >= 1:
+        progress = 1
+        status = "Done. "
+    status = status + info
+    block = int(round(barLength*progress))
+    text = "[{0}] {1:3.1f}% {2}".format("."*block + " "*(barLength-block),
+                                        progress*100, status)
+    sys.stdout.write(text)
+    sys.stdout.write('\b'*(9 + barLength + len(status)))
+    sys.stdout.flush()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b4a88f5..0752884 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -20,10 +20,9 @@
 
 FILE(GLOB proto_files proto/*.proto)
 protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_files})
-INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/include")
+#MESSAGE(STATUS "proto_srcs: ${proto_srcs}")
 
 LIST(APPEND singa_sources ${proto_hdrs} ${proto_srcs})
-SET(PREVIOUS_LINKER_LIBS ${SINGA_LINKER_LIBS})
 
 AUX_SOURCE_DIRECTORY(utils utils_source)
 LIST(APPEND singa_sources ${utils_source})
@@ -32,6 +31,8 @@ AUX_SOURCE_DIRECTORY(core/device core_source)
 AUX_SOURCE_DIRECTORY(core/memory core_source)
 AUX_SOURCE_DIRECTORY(core/scheduler core_source)
 AUX_SOURCE_DIRECTORY(core/tensor core_source)
+LIST(APPEND singa_sources ${core_source})
+
 IF (USE_CUDA)
     FILE(GLOB_RECURSE cuda_source core "*.cu")
     SET(FLAGS_BACKUP ${CMAKE_CXX_FLAGS})
@@ -45,7 +46,8 @@ IF (USE_CUDA)
     include_directories("${CMAKE_CURRENT_SOURCE_DIR}/core/tensor")
     SET(CMAKE_CXX_FLAGS ${FLAGS_BACKUP})
 ENDIF (USE_CUDA)
-LIST(APPEND singa_sources ${core_source} ${cuda_objs})
+
+SET(global_cuda_objs ${cuda_objs} PARENT_SCOPE)
 
 AUX_SOURCE_DIRECTORY(model model_source)
 AUX_SOURCE_DIRECTORY(model/layer model_source)
@@ -58,7 +60,7 @@ LIST(APPEND singa_sources ${model_source})
 AUX_SOURCE_DIRECTORY(io io_source)
 AUX_SOURCE_DIRECTORY(io/network io_source)
 LIST(APPEND singa_sources ${io_source})
-ADD_LIBRARY(singa SHARED ${singa_sources})
+
 ADD_CUSTOM_TARGET(
   copy_protobuf
   COMMAND ${CMAKE_COMMAND} -E make_directory 
"${CMAKE_BINARY_DIR}/include/singa/proto"
@@ -71,59 +73,15 @@ FOREACH(fil ${proto_hdrs})
     COMMAND ${CMAKE_COMMAND} -E copy ${fil} 
"${CMAKE_BINARY_DIR}/include/singa/proto"
  )
 ENDFOREACH()
-ADD_DEPENDENCIES(singa copy_protobuf)
-TARGET_LINK_LIBRARIES(singa ${SINGA_LINKER_LIBS})
-#MESSAGE(STATUS "HEADERS: ${proto_hdrs}")
-
-IF(USE_PYTHON)
-
-    protobuf_generate_python(proto_pys ${proto_files})
-    #MESSAGE(STATUS "proto pys: ${proto_pys}")
-    FILE(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i")
-    CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i.in" 
"${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i")
-
-    FILE(GLOB python_files python/swig/singa.i)
-    # delete old .cxx file
-    FILE(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/singa_wrap.cxx")
 
-    # generate cxx and wrap.py
-    swig_generate_cxx(python_srcs ${python_files})
+ADD_LIBRARY(singa_objects OBJECT ${singa_sources})
+ADD_DEPENDENCIES(singa_objects copy_protobuf)
 
-    #FILE(COPY python/ DESTINATION ${CMAKE_BINARY_DIR}/python/singa 
FILES_MATCHING PATTERN "swig" EXCLUDE PATTERN "*.py")
-    #Create symlinks for all python source files  Do not omit !!!RELATIVE!!!
-    file(GLOB_RECURSE python_source_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} 
*.py)
-
-    create_symlinks(${python_source_files})
-
-    ADD_LIBRARY(_singa_wrap SHARED ${python_srcs} ${singa_sources} 
${cuda_objs} ${proto_pys})
-    SET(WRAPPER_LINKER_LIBS "${PREVIOUS_LINKER_LIBS}")
-    TARGET_LINK_LIBRARIES(_singa_wrap ${WRAPPER_LINKER_LIBS} 
${PYTHON_LIBRARIES})
-    TARGET_INCLUDE_DIRECTORIES(_singa_wrap PRIVATE ${PYTHON_INCLUDE_DIRS})
-    ADD_DEPENDENCIES(_singa_wrap singa )
-    #message(STATUS "PREVIOUS_LINKER_LIBS ${PREVIOUS_LINKER_LIBS}")
-
-    SET_TARGET_PROPERTIES(_singa_wrap
-        PROPERTIES PREFIX ""
-        LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/python/singa
-        )
-
-    #SETUP
-    SET(SETUP_PY_IN "python/setup.py.in")
-    SET(SETUP_PY    "${CMAKE_BINARY_DIR}/python/setup.py")
-    CONFIGURE_FILE(${SETUP_PY_IN} ${SETUP_PY})
-
-    #create python/singa/proto/__init__.py
-    FILE(WRITE ${CMAKE_BINARY_DIR}/python/singa/proto/__init__.py "")
-    #MESSAGE(STATUS "apple: ${APPLE}")
-    IF(APPLE)
-        ADD_CUSTOM_TARGET(
-            change_suffix ALL 
-            COMMAND ${CMAKE_COMMAND} -E rename 
"${CMAKE_BINARY_DIR}/python/singa/_singa_wrap.dylib" 
"${CMAKE_BINARY_DIR}/python/singa/_singa_wrap.so"
-            COMMENT "change .dylib to .so in mac system"
-        )
-        ADD_DEPENDENCIES(change_suffix _singa_wrap)
-    ENDIF(APPLE)
+ADD_LIBRARY(singa SHARED $<TARGET_OBJECTS:singa_objects> ${cuda_objs})
+TARGET_LINK_LIBRARIES(singa ${SINGA_LINKER_LIBS})
 
-ENDIF(USE_PYTHON)
+#pass configure infor to swig 
+FILE(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/api/config.i")
+CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/api/config.i.in" 
"${CMAKE_CURRENT_SOURCE_DIR}/api/config.i")
 
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/config.i
----------------------------------------------------------------------
diff --git a/src/api/config.i b/src/api/config.i
new file mode 100644
index 0000000..cfbcd46
--- /dev/null
+++ b/src/api/config.i
@@ -0,0 +1,4 @@
+// Pass in cmake configurations to swig
+#define USE_CUDA 1
+#define USE_CUDNN 1
+#define CUDNN_VERSION_SWIG 5005

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/config.i.in
----------------------------------------------------------------------
diff --git a/src/api/config.i.in b/src/api/config.i.in
new file mode 100644
index 0000000..5743ba3
--- /dev/null
+++ b/src/api/config.i.in
@@ -0,0 +1,4 @@
+// Pass in cmake configurations to swig
+#cmakedefine01 USE_CUDA
+#cmakedefine01 USE_CUDNN
+#cmakedefine CUDNN_VERSION_SWIG ${CUDNN_VERSION_SWIG}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/core_device.i
----------------------------------------------------------------------
diff --git a/src/api/core_device.i b/src/api/core_device.i
new file mode 100644
index 0000000..b3521be
--- /dev/null
+++ b/src/api/core_device.i
@@ -0,0 +1,69 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/*interface file for swig */
+
+%module core_device
+%include "std_vector.i"
+%include "std_string.i"
+%include "std_pair.i"
+%include "std_shared_ptr.i"
+
+%{
+#include "singa/core/device.h"
+%}
+
+/* smart pointer to avoid memory leak */
+%shared_ptr(singa::Device);
+
+namespace std{
+%template(sizePair) std::pair<size_t, size_t>;
+%template(vectorPair) std::vector<std::pair<size_t, size_t>>;
+%template(vectorSharedPtr) std::vector<std::shared_ptr<singa::Device>>;
+}
+
+namespace singa{
+
+class Device {
+  public:
+  virtual void SetRandSeed(unsigned seed) = 0;
+  std::shared_ptr<Device> host();
+  int id() const;
+};
+
+class Platform {
+ public:
+#if USE_CUDA
+  static int GetNumGPUs();
+  static const std::vector<int> GetGPUIDs();
+  static const std::pair<size_t, size_t> GetGPUMemSize(const int device);
+  static const std::vector<std::pair<size_t, size_t>> GetGPUMemSize();
+  static const std::string DeviceQuery(int id, bool verbose = false);
+  static const std::vector<std::shared_ptr<Device> >
+  CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
+  static const std::vector<std::shared_ptr<Device>>
+  CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
+#endif // USE_CUDA
+  static std::shared_ptr<Device> GetDefaultDevice();
+};
+
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/core_tensor.i
----------------------------------------------------------------------
diff --git a/src/api/core_tensor.i b/src/api/core_tensor.i
new file mode 100644
index 0000000..60f8b45
--- /dev/null
+++ b/src/api/core_tensor.i
@@ -0,0 +1,371 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/*interface file for swig */
+
+%module core_tensor
+%include "std_vector.i"
+%include "std_string.i"
+%include "std_shared_ptr.i"
+
+/*
+%include "carrays.i"
+%array_class(float, floatArray);
+%array_class(int, intArray);
+%array_class(char, charArray);
+%array_class(double, doubleArray);
+*/
+
+%{
+#define SWIG_FILE_WITH_INIT
+#include "singa/core/tensor.h"
+#include "singa/core/device.h"
+#include "singa/proto/core.pb.h"
+#include "singa/proto/model.pb.h"
+using singa::DataType;
+%}
+%shared_ptr(singa::Device)
+
+%include "numpy.i"
+%init %{
+  import_array();
+%}
+%apply (float *IN_ARRAY1, int DIM1) {
+       (const float *src, const size_t num)
+}
+%apply (int *IN_ARRAY1, int DIM1) {
+       (const int *src, const size_t num)
+}
+%apply (float *ARGOUT_ARRAY1, int DIM1) {
+       (float *value, const size_t num)
+}
+%apply (int *ARGOUT_ARRAY1, int DIM1) {
+       (int *value, const size_t num)
+}
+
+%template(Shape) std::vector<size_t>;
+
+namespace singa{
+
+  enum DataType {
+    kFloat32, kFloat16, kInt, kChar, kDouble
+  };
+
+  inline size_t Product(const std::vector<size_t> &shape,
+                        int start = 0, size_t len = 0);
+  inline size_t SizeOf(DataType t);
+
+
+  class Tensor {
+
+   public:
+    Tensor();
+    explicit Tensor(const std::vector<size_t> &shape,
+                    DataType dtype = kFloat32);
+    Tensor(const std::vector<size_t> &shape,
+           std::shared_ptr<singa::Device> dev, DataType dtype = kFloat32);
+    Tensor(const Tensor &from);
+
+    std::shared_ptr<singa::Device> device() const;
+/*
+    template <typename DType> const DType* data() const;
+    %template(floatData) data<float>;
+    %template(intData) data<int>;
+    %template(charData) data<char>;
+    %template(doubleData) data<double>;
+    */
+
+    template <typename SType> void GetValue(SType* value, const size_t num);
+    %template(floatGetValue) GetValue<float>;
+    %template(intGetValue) GetValue<int>;
+
+    const DataType data_type() const;
+    const std::vector<size_t> &shape() const;
+    const size_t shape(size_t idx) const;
+    size_t nDim() const;
+    bool transpose() const;
+    size_t Size() const;
+    size_t MemSize() const;
+    void Reshape(const std::vector<size_t> &shape);
+    void ResetLike(const Tensor &t);
+    void AsType(DataType type);
+    void ToDevice(std::shared_ptr<singa::Device> dev);
+    void ToHost();
+    float L2() const;
+    float L1() const;
+
+    template <typename SType> void SetValue(const SType x);
+    %template(floatSetValue) SetValue<float>;
+    /* TODO(chonho-01) other types */
+    // --- other types
+
+    template <typename DType> void CopyDataFromHostPtr(const DType *src,
+                                                       const size_t num,
+                                                       const size_t offset = 
0);
+    %template(floatCopyDataFromHostPtr) CopyDataFromHostPtr<float>;
+    %template(intCopyDataFromHostPtr) CopyDataFromHostPtr<int>;
+    // --- other types
+
+    void CopyData(const Tensor &other);
+    Tensor Clone() const;
+    Tensor T() const;
+
+    /* python has no assignment operator
+    Tensor &operator=(const Tensor &t); */
+    Tensor &operator+=(const Tensor &t);
+    Tensor &operator-=(const Tensor &t);
+    Tensor &operator*=(const Tensor &t);
+    Tensor &operator/=(const Tensor &t);
+
+
+    template <typename DType> Tensor &operator+=(const DType x);
+    %template(iAdd_f) operator+=<float>;
+    // --- other types
+
+    template <typename DType> Tensor &operator-=(DType x);
+    %template(iSub_f) operator-=<float>;
+    // --- other types
+
+    template <typename DType> Tensor &operator*=(DType x);
+    %template(iMul_f) operator*=<float>;
+    // --- other types
+
+    template <typename DType> Tensor &operator/=(DType x);
+    %template(iDiv_f) operator/=<float>;
+    // --- other types
+
+
+    /*TODO(chonho-04)
+    amax
+    amin
+    asum
+    */
+
+
+  };
+
+  void CopyDataToFrom(Tensor *dst, const Tensor &src, size_t num,
+                      size_t src_offset = 0, size_t dst_offset = 0);
+
+  Tensor Reshape(const Tensor &in, const std::vector<size_t> &s);
+
+  Tensor Abs(const Tensor &t);
+  Tensor Exp(const Tensor &t);
+  Tensor Log(const Tensor &t);
+  Tensor ReLU(const Tensor &t);
+  Tensor Sigmoid(const Tensor &t);
+  Tensor Sign(const Tensor &t);
+  Tensor Sqrt(const Tensor &t);
+  Tensor Square(const Tensor &t);
+  Tensor Tanh(const Tensor &t);
+
+  Tensor Sum(const Tensor &t, int axis);
+  template <typename SType> SType Sum(const Tensor &t);
+  %template(floatSum) Sum<float>;
+  // --- other types
+
+  /* TODO(chonho-02)
+     need to implement the average of all elements ??? */
+  Tensor Average(const Tensor &t, int axis);
+  Tensor SoftMax(const Tensor &t);
+
+
+  Tensor Pow(const Tensor &base, const Tensor &exp);
+  void Pow(const Tensor &base, const Tensor &exp, Tensor *out);
+
+  %rename(Pow_f) Pow(const Tensor &in, const float x);
+  template <typename SType>
+  Tensor Pow(const Tensor &in, const SType x);
+  %template(pow_temp) Pow<float>;
+
+  %rename(Pow_f_out) Pow(const Tensor &in, const float x, Tensor *out);
+  template <typename SType>
+  void Pow(const Tensor &in, const SType x, Tensor *out);
+  %template(pow_temp) Pow<float>;
+
+
+  /* rename comparison operators */
+  %rename(LT_Tf) operator<(const Tensor &t, const float x);
+  %rename(LE_Tf) operator<=(const Tensor &t, const float x);
+  %rename(GT_Tf) operator>(const Tensor &t, const float x);
+  %rename(GE_Tf) operator>=(const Tensor &t, const float x);
+  %rename(LT_TT) operator<(const Tensor &lhs, const Tensor &rhs);
+  %rename(LE_TT) operator<=(const Tensor &lhs, const Tensor &rhs);
+  %rename(GT_TT) operator>(const Tensor &lhs, const Tensor &rhs);
+  %rename(GE_TT) operator>=(const Tensor &lhs, const Tensor &rhs);
+
+  Tensor operator<(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator<=(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator>(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator>=(const Tensor &lhs, const Tensor &rhs);
+
+
+  template <typename DType>
+  Tensor operator<(const Tensor &t, const DType x);
+  %template(op) operator< <float>;
+  // --- other types
+
+  template <typename DType>
+  Tensor operator<=(const Tensor &t, const DType x);
+  %template(op) operator<= <float>;
+  // --- other types
+
+  template <typename DType>
+  Tensor operator>(const Tensor &t, const DType x);
+  %template(op) operator> <float>;
+  // --- other types
+
+  template <typename DType>
+  Tensor operator>=(const Tensor &t, const DType x);
+  %template(op) operator>= <float>;
+  // --- other types
+
+  /* NOTE(chonho)
+  no need to include theses
+  in python, these can be replaced with comparison operators
+
+  template <typename DType>
+  void LT(const Tensor &t, DType x, Tensor *ret);
+  template <typename DType>
+  void LE(const Tensor &t, DType x, Tensor *ret);
+  template <typename DType>
+  void GT(const Tensor &t, DType x, Tensor *ret);
+  template <typename DType>
+  void GE(const Tensor &t, DType x, Tensor *ret);
+  */
+
+
+  /* ========== Arithmetic operations ========== */
+  %rename(Add_TT) operator+(const Tensor &lhs, const Tensor &rhs);
+  %rename(Sub_TT) operator-(const Tensor &lhs, const Tensor &rhs);
+  %rename(EltwiseMul_TT) operator*(const Tensor &lhs, const Tensor &rhs);
+  %rename(Div_TT) operator/(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator+(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator-(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator*(const Tensor &lhs, const Tensor &rhs);
+  Tensor operator/(const Tensor &lhs, const Tensor &rhs);
+
+  %rename(Add_Tf) operator+(const Tensor &t, float x);
+  template <typename DType>
+  Tensor operator+(const Tensor &t, DType x);
+  %template(op) operator+<float>;
+  // --- other types
+
+  %rename(Sub_Tf) operator-(const Tensor &t, float x);
+  template <typename DType>
+  Tensor operator-(const Tensor &t, DType x);
+  %template(op) operator-<float>;
+  // --- other types
+
+  %rename(EltwiseMul_Tf) operator*(const Tensor &t, float x);
+  template <typename DType>
+  Tensor operator*(const Tensor &t, DType x);
+  %template(op) operator*<float>;
+  // --- other types
+
+  %rename(Div_Tf) operator/(const Tensor &t, float x);
+  template <typename DType>
+  Tensor operator/(const Tensor &t, DType x);
+  %template(op) operator/<float>;
+  // --- other types
+
+  void Add(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+  void Sub(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+  void EltwiseMult(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+  void Div(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+
+  template <typename DType>
+  void Add(const Tensor &t, DType x, Tensor *ret);
+  %template(Add_Tf_out) Add<float>;
+  // --- other types
+
+  template <typename DType>
+  void Sub(const Tensor &t, DType x, Tensor *ret);
+  %template(Sub_Tf_out) Sub<float>;
+  // --- other types
+
+  template <typename DType>
+  void EltwiseMult(const Tensor &t, DType x, Tensor *ret);
+  %template(EltwiseMult_Tf_out) EltwiseMult<float>;
+  // --- other types
+
+  template <typename DType>
+  void Div(const Tensor &t, DType x, Tensor *ret);
+  %template(Div_Tf_out) Div<float>;
+  // --- other types
+
+
+  /* ========== Random operations ========== */
+  template <typename SType>
+  void Bernoulli(const SType p, Tensor *out);
+  %template(floatBernoulli) Bernoulli<float>;
+  // --- other types
+
+  template <typename SType>
+  void Gaussian(const SType mean, const SType std, Tensor *out);
+  %template(floatGaussian) Gaussian<float>;
+  // --- other types
+
+  template <typename SType>
+  void Uniform(const SType low, const SType high, Tensor *out);
+  %template(floatUniform) Uniform<float>;
+  // --- other types
+
+  /* ========== Blas operations ========== */
+  template <typename SType>
+  void Axpy(SType alpha, const Tensor &in, Tensor *out);
+  %template(floatAxpy) Axpy<float>;
+  // --- other types
+
+  Tensor Mult(const Tensor &A, const Tensor &B);
+  void Mult(const Tensor &A, const Tensor &B, Tensor *C);
+  template <typename SType>
+  void Mult(const SType alpha, const Tensor &A, const Tensor &B,
+            const SType beta, Tensor *C);
+  %template(floatMult) Mult<float>;
+
+  void AddColumn(const Tensor &v, Tensor *M);
+  template <typename SType>
+  void AddColumn(const SType alpha, const SType beta, const Tensor &v,
+                 Tensor *M);
+  %template(floatAddColumn) AddColumn<float>;
+
+  void AddRow(const Tensor &v, Tensor *M);
+  template <typename SType>
+  void AddRow(const SType alpha, const SType beta, const Tensor &v,
+              Tensor *M);
+  %template(floatAddRow) AddRow<float>;
+
+  void DivColumn(const Tensor &v, Tensor *M);
+  void DivRow(const Tensor &v, Tensor *M);
+  void MultColumn(const Tensor &v, Tensor *M);
+  void MultRow(const Tensor &v, Tensor *M);
+  void SubColumn(const Tensor &v, Tensor *M);
+  void SubRow(const Tensor &v, Tensor *M);
+
+  void SumColumns(const Tensor &M, Tensor *v);
+  void SumRows(const Tensor &M, Tensor *v);
+
+  Tensor SoftMax(const Tensor &in);
+  void SoftMax(const Tensor &in, Tensor *out);
+
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/model_layer.i
----------------------------------------------------------------------
diff --git a/src/api/model_layer.i b/src/api/model_layer.i
new file mode 100644
index 0000000..ae651d5
--- /dev/null
+++ b/src/api/model_layer.i
@@ -0,0 +1,102 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/*interface file for swig */
+
+%module model_layer
+%include "std_vector.i"
+%include "std_string.i"
+%include "std_pair.i"
+%include "std_shared_ptr.i"
+
+
+%{
+#include "singa/model/layer.h"
+#include "../src/model/layer/rnn.h"
+#include "../src/model/layer/cudnn_rnn.h"
+#include "singa/core/tensor.h"
+#include "singa/proto/model.pb.h"
+#include "singa/singa_config.h"
+using singa::Tensor;
+using singa::ParamSpec;
+using singa::DataType;
+using singa::Device;
+using singa::LayerConf;
+%}
+
+%shared_ptr(singa::Layer)
+%shared_ptr(singa::RNN)
+#if USE_CUDNN
+%shared_ptr(singa::CudnnRNN)
+#endif
+
+namespace std {
+  %template(strVector) vector<string>;
+  %template(paramVector) vector<singa::ParamSpec>;
+  %template(tensorVector) vector<singa::Tensor>;
+  %template(ttvecPair) pair<singa::Tensor, vector<singa::Tensor>>;
+  %template(tvecPair) pair<vector<singa::Tensor>, vector<singa::Tensor>>;
+}
+
+
+namespace singa {
+
+class Layer {
+  public:
+    Layer();
+//      virtual void Setup(const std::vector<vector<size_t>>&, const string&);
+    void Setup(const std::vector<size_t>& in_sample_shape,
+                        const std::string& proto_str);
+    virtual const std::vector<Tensor> param_values();
+    virtual const std::vector<size_t> GetOutputSampleShape() const;
+    virtual void ToDevice(std::shared_ptr<Device> device);
+    virtual void AsType(DataType dtype);
+    virtual const Tensor Forward(int flag, const Tensor& input);
+    virtual const std::vector<Tensor> Forward(
+        int flag, const std::vector<Tensor>& inputs);
+    virtual const std::pair<Tensor, std::vector<Tensor>> Backward(
+        int flag, const Tensor& grad);
+    virtual const std::pair<std::vector<Tensor>, std::vector<Tensor>>
+    Backward(int flag, const vector<Tensor>& grads);
+};
+
+std::shared_ptr<Layer> CreateLayer(const std::string& type);
+const std::vector<std::string> GetRegisteredLayers();
+class RNN : public Layer {
+};
+
+#if USE_CUDA && USE_CUDNN
+#if CUDNN_VERSION_SWIG >= 5005
+class CudnnRNN : public RNN {
+ public:
+ // note: Must use std::vector instead of vector.
+  const std::vector<Tensor> Forward(int flag, const std::vector<Tensor>& 
inputs) override;
+  const std::pair<std::vector<Tensor>, std::vector<Tensor>> Backward(
+      int flag, const std::vector<Tensor>& grads) override;
+  void ToDevice(std::shared_ptr<Device> device) override;
+    const std::vector<Tensor> param_values() override;
+    const std::vector<size_t> GetOutputSampleShape() const override;
+};
+
+#endif  // CUDNN_VERSION_SWIG >= 5005
+#endif  // USE_CUDA && USE_CUDNN
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/model_loss.i
----------------------------------------------------------------------
diff --git a/src/api/model_loss.i b/src/api/model_loss.i
new file mode 100644
index 0000000..864ad88
--- /dev/null
+++ b/src/api/model_loss.i
@@ -0,0 +1,62 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/*interface file for swig */
+
+%module model_loss
+%include "std_string.i"
+%{
+#include "singa/model/loss.h"
+  using singa::Tensor;
+%}
+
+namespace singa {
+class Loss {
+public:
+  Loss() = default;
+  virtual ~Loss() {}
+
+  virtual Tensor Forward(int flag, const Tensor &prediction,
+                         const Tensor &target) = 0;
+
+  float Evaluate(int flag, const Tensor &prediction, const Tensor &target);
+
+  /// Compute the gradients of the loss values w.r.t. the prediction.
+  virtual Tensor Backward() = 0;
+};
+
+class MSE : public Loss {
+public:
+  Tensor Forward(int flag, const Tensor &prediction, const Tensor &target)
+      override;
+
+  Tensor Backward() override;
+};
+
+class SoftmaxCrossEntropy : public Loss {
+public:
+  Tensor Forward(int flag, const Tensor &prediction, const Tensor &target)
+      override;
+
+  Tensor Backward() override;
+};
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/model_metric.i
----------------------------------------------------------------------
diff --git a/src/api/model_metric.i b/src/api/model_metric.i
new file mode 100644
index 0000000..9d93cd0
--- /dev/null
+++ b/src/api/model_metric.i
@@ -0,0 +1,43 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/*interface file for swig */
+
+%module model_metric
+%{
+#include "singa/model/metric.h"
+using singa::Tensor;
+%}
+
+namespace singa {
+class Metric {
+ public:
+  Metric() = default;
+  virtual ~Metric() {}
+  virtual Tensor Forward(const Tensor& prediction, const Tensor& target) = 0;
+  float Evaluate(const Tensor& prediction, const Tensor& target);
+};
+class Accuracy : public Metric {
+ public:
+  Tensor Forward(const Tensor& prediction, const Tensor& target);
+};
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/model_optimizer.i
----------------------------------------------------------------------
diff --git a/src/api/model_optimizer.i b/src/api/model_optimizer.i
new file mode 100644
index 0000000..78b30b8
--- /dev/null
+++ b/src/api/model_optimizer.i
@@ -0,0 +1,70 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/*interface file for swig */
+
+%module model_optimizer
+%include "std_vector.i"
+%include "std_string.i"
+%include "std_pair.i"
+%include "std_shared_ptr.i"
+
+%{
+#include "singa/model/optimizer.h"
+#include "singa/proto/model.pb.h"
+using singa::Tensor;
+using singa::ParamSpec;
+using singa::OptimizerConf;
+%}
+
+
+%shared_ptr(singa::Optimizer)
+%shared_ptr(singa::Regularizer)
+%shared_ptr(singa::Constraint)
+
+namespace singa {
+class Optimizer {
+ public:
+  // Optimizer() = default;
+  virtual ~Optimizer() = default;
+  void Setup(const std::string& str);
+  virtual void Apply(int step, float lr, const std::string& name,
+    const Tensor& grad, Tensor& value) = 0;
+};
+inline std::shared_ptr<Optimizer> CreateOptimizer(const std::string& type);
+
+class Constraint {
+ public:
+  Constraint() = default;
+  void Setup(const std::string& conf_str);
+  void Apply(int step, Tensor& grad, Tensor& value);
+};
+
+inline std::shared_ptr<Constraint> CreateConstraint(const std::string& type);
+
+class Regularizer {
+ public:
+  Regularizer() = default;
+  void Setup(const std::string& conf_str);
+  void Apply(int step, Tensor& grad, Tensor& value);
+};
+inline std::shared_ptr<Regularizer> CreateRegularizer(const std::string& type);
+}

[06/10] incubator-singa git commit: SINGA-244 Separating swig interface and python binding files - move swig interface files to src/api - move python to root folder - move python related cmake functions to python cmake files - use add_library OBJ

Reply via email to