http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/optimizer.py ---------------------------------------------------------------------- diff --git a/src/python/optimizer.py b/src/python/optimizer.py deleted file mode 100644 index 43b4c9d..0000000 --- a/src/python/optimizer.py +++ /dev/null @@ -1,330 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ============================================================================= -""" Python wrappers for optimizers implemented by C++.""" - -from . import singa_wrap as singa -import tensor -from proto import model_pb2 - - -class Optimizer(object): - """Base python optimizer. - - Usages: - 1. construct the optimizer - 2. (optional) register each parameter with its specs. - 3. use the optimizer to update parameter values given parameter - gradients and other optional info - """ - def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None, - momentum_gen=None, regularizer=None, constraint=None): - """Constructor. - - Args: - lr: a constant or a function that generates learning rate given a - step, which is mutually exclusive with 'lr_gen'. - momentum: a constant or a function that generates the momentum value - given a step. - decay (float): the coefficent for L2 regularizer, which is mutually - exclusive with 'regularizer'. - lr_gen (function): a function returns the learning rate given - the current training step. It is mutually exclusive with lr. If - both are not set, the apply_with_lr function should be used for - param updating. - momentum_gen (function): a function returns the momentum value given - the current training step. It is mutually exclusive with - momentum. - regularizer: an instance of Regularizer or RegularizerConf; If set, - regularization would be applied in apply_with_lr(). - Users can also do regularization outside. - constraint: an instance of Constraint or ConstraintConf; If set, - constraint would be applied inside apply_with_lr(). Users can - also do regularization outside. - """ - if lr is not None: - assert lr_gen is None, 'Cannot set lr and lr_gen at the same time' - - def lr_gen(step): - return lr - self.lr_gen = lr_gen - if momentum is not None: - assert momentum_gen is None, 'Cannot set momentum and momentum_gen'\ - ' at the same time' - - def momentum_gen(step): - return momentum - self.momentum_gen = momentum_gen - if decay is not None: - assert regularizer is None, \ - 'Cannot set decay and regularizer at the same time' - regularizer = L2Regularizer(decay) - if regularizer is not None: - if type(regularizer) is model_pb2.RegularizerConf: - self.regularizer = CppRegularizer(regularizer) - else: - self.regularizer = regularizer - else: - self.regularizer = None - if constraint is not None: - if type(constraint) is model_pb2.ConstraintConf: - self.constraint = CppConstraint(constraint) - else: - self.constraint = constraint - else: - self.constraint = None - self.regularizers = {} - self.constraints = {} - - def register(self, name, specs): - """Register the param specs, including creating regularizer and - constraint per param object. Param specific regularizer and constraint - have higher priority than the global ones. - - Args: - name (str): parameter name - specs (ParamSpec): protobuf obj - """ - if specs.has_regularizer(): - self.regularizers[name] = CppRegularizer(specs.constraint) - if specs.has_constraint(): - self.constraints[name] = CppConstraint(specs.regularizer) - if specs.has_lr_mult(): - self.learning_rate_multiplier[name] = specs.lr_mult() - if specs.has_decay_mult(): - self.decay_multiplier[name] = specs.decay_mult() - - def apply_regularizer_constraint(self, value, grad, name=None, step=None): - """Apply regularization and constraint if available. - - If there are both global regularizer (constraint) and param specific - regularizer (constraint), it would use the param specific one. - - Args: - value (Tensor): parameter value Tensor - grad (Tensor): parameter gradient Tensor - name (string): to get parameter specific regularizer or constraint - step (int): some regularizer or constraint would use step - - Return: - the updated gradient Tensor - """ - if name is not None and name in self.constraints: - self.constraints[name].apply(value, grad, step) - elif self.constraint is not None: - self.constraint.apply(step, value, grad) - - if name is not None and name in self.regularizers: - self.regularizers[name].apply(value, grad, step) - elif self.regularizer is not None: - self.regularizer.apply(step, value, grad) - return grad - - def apply_with_lr(self, step, lr, grad, value, name=None): - """Do update with given learning rate. - - The subclass optimizer must override this function. - Args: - step (int): training step (could be iteration or epoch) - lr (float): learning rate - grad (Tensor): parameter gradient - value (Tesnor): parameter value - name (string): paramter name to retrieval parameter specific - updating rules (including regularizer and constraint) - - Return: - updated parameter value - """ - assert False, 'This is the base function, pls call the subclass func' - return value - - def apply(self, step, grad, value, name=None): - """Do update assume the learning rate generator is set. - - The subclass optimizer does not need to override this function. - Args: - step (int): training step (could be iteration or epoch) - grad (Tensor): parameter gradient - value (Tesnor): parameter value - name (string): paramter name to retrieval parameter specific - updating rules (including regularizer and constraint) - - Return: - updated parameter value - """ - - assert self.lr_gen is not None, 'Learning rate generator is not set.'\ - 'Either set the lr_gen in constructor or call apply_with_lr' - lr = self.lr_gen(step) - return self.apply_with_lr(step, lr, grad, value, name) - - -class SGD(Optimizer): - def __init__(self, lr=None, momentum=None, decay=None, **kwargs): - """The vallina Stochasitc Gradient Descent algorithm. - - See the base Optimizer for all arguments. - """ - super(SGD, self).__init__(lr, momentum, decay) - conf = model_pb2.OptimizerConf() - self.opt = singa.CreateOptimizer('SGD') - self.opt.Setup(conf.SerializeToString()) - - def apply_with_lr(self, step, lr, grad, value, name): - self.apply_regularizer_constraint(step, value, grad, name) - self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor) - return value - - -class Nesterov(Optimizer): - def __init__(self, lr=None, momentum=0.9, decay=None, **kwargs): - """The SGD with Nesterov momentum - - See the base Optimizer for all arguments. - """ - super(Nesterov, self).__init__(lr, momentum, decay, kwargs) - conf = model_pb2.OptimizerConf() - self.opt = singa.CreateOptimizer('Nesterov') - self.opt.Setup(conf.SerializeToString()) - - def apply_with_lr(self, step, lr, grad, value, name): - self.apply_regularizer_constraint(step, value, grad, name) - self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor) - return value - - -class AdaGrad(Optimizer): - def __init__(self, epsilon=1e-8, lr=None, decay=None, **kwargs): - """AdaGrad optimizer. - - See the base Optimizer for all constructor args. - Args: - epsilon (float): small number for preventing numeric error. - """ - super(RMSProp, self).__init__(lr, decay, **kwargs) - conf = model_pb2.OptimizerConf() - conf.delta = epsilon - self.opt = singa.CreateOptimizer('AdaGrad') - self.opt.Setup(conf.SerializeToString()) - - def apply_with_lr(self, step, lr, grad, value, name): - grad = self.apply_regularizer_constraint(step, value, grad, name) - self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor) - return value - - -class RMSProp(Optimizer): - def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, **kwargs): - """RMSProp optimizer. - - See the base Optimizer for all constructor args. - Args: - rho (float): float within [0, 1] - epsilon (float): small value for preventing numeric error - """ - super(RMSProp, self).__init__(lr, decay, kwargs) - conf = model_pb2.OptimizerConf() - conf.rho = rho - conf.delta = epsilon - self.opt = singa.CreateOptimizer('RMSProp') - self.opt.Setup(conf.SerializeToString()) - - def apply_with_lr(self, step, lr, grad, value, name): - grad = self.apply_regularizer_constraint(step, value, grad, name) - self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor) - return value - - -class Regularizer(object): - """Base Python regularizer for parameter gradients. - """ - def apply(self, value, grad): - assert False, 'Not Implemented. Call the subclass function.' - return grad - - -class CppRegularizer(Regularizer): - """Wrapper for regularizer implemented using C++. - """ - def __init__(self, conf): - """Constructor. - - Args: - conf (RegularizerConf): protobuf message for the configuration. - """ - self.reg = singa.CreateRegularizer(conf.type) - self.reg.Setup(conf.SerializeToString()) - - def apply(self, step, value, grad): - self.reg.Apply(step, value.singa_tensor, grad.singa_tensor) - return grad - - -class L2Regularizer(Regularizer): - """L2 regularization""" - def __init__(self, coefficient): - """ - Args: - coefficient (float): regularization coefficient. - """ - self.coefficient = coefficient - - def apply(self, step, value, grad, coefficient=None): - if coefficient is None: - assert self.coefficient is not None, 'Must set the coefficient' - coefficient = self.coefficient - tensor.axpy(coefficient, value, grad) - return grad - - -class Constraint(object): - """Base Python constraint class for paramter gradients. - """ - def apply(self, step, value, grad): - return grad - - -class CppConstraint(Constraint): - """Wrapper for constraints implemented using C++. - """ - def __init__(self, conf): - """Constructor. - - Args: - conf (ConstraintConf): protobuf message for the configuration. - """ - self.constraint = singa.CreateConstraint(conf.type) - self.constraint.Setup(conf.SerializeToString()) - - def apply(self, step, value, grad): - self.constraint.Apply(step, value.singa_tensor, grad.singa_tensor) - return grad - - -class L2Constraint(Constraint): - """Rescale the gradient to make the L2 norm <= a given threshold. - """ - def __init__(self, threshold=None): - self.threshold = threshold - - def apply(self, step, value, grad, threshold=None): - if threshold is None: - assert self.threshold is not None, 'Must set the threshold' - threshold = self.threshold - nrm = grad.nrm2() - grad *= threshold / nrm - return grad
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/setup.py.in ---------------------------------------------------------------------- diff --git a/src/python/setup.py.in b/src/python/setup.py.in index 739104d..b53e54b 100644 --- a/src/python/setup.py.in +++ b/src/python/setup.py.in @@ -38,38 +38,36 @@ setup( packages= ['singa', 'singa.proto'], - ''' - py_modules=["singa"], - - install_requires=['peppercorn'], - List additional groups of dependencies here (e.g. development - dependencies). You can install these using the following syntax, - for example: - $ pip install -e .[dev,test] - extras_require={ - 'dev': ['check-manifest'], - 'test': ['coverage'], - }, + #py_modules=["singa"], + + #install_requires=['peppercorn'], + #List additional groups of dependencies here (e.g. development + #dependencies). You can install these using the following syntax, + #for example: + #$ pip install -e .[dev,test] + #extras_require={ + # 'dev': ['check-manifest'], + # 'test': ['coverage'], + #}, + + #If there are data files included in your packages that need to be + #installed, specify them here. If using Python 2.6 or less, then these + #have to be included in MANIFEST.in as well. - If there are data files included in your packages that need to be - installed, specify them here. If using Python 2.6 or less, then these - have to be included in MANIFEST.in as well. - ''' package_data={ 'singa': ['_singa_wrap.so'], }, - ''' - Although 'package_data' is the preferred approach, in some case you may - need to place data files outside of your packages. See: - http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa - In this case, 'data_file' will be installed into '<sys.prefix>/my_data' - data_files=[('my_data', ['data/data_file'])], - - To provide executable scripts, use entry points in preference to the - "scripts" keyword. Entry points provide cross-platform support and allow - pip to create the appropriate form of executable for the target platform. - ''' + #Although 'package_data' is the preferred approach, in some case you may + #need to place data files outside of your packages. See: + #http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa + #In this case, 'data_file' will be installed into '<sys.prefix>/my_data' + #data_files=[('my_data', ['data/data_file'])], + + #To provide executable scripts, use entry points in preference to the + #"scripts" keyword. Entry points provide cross-platform support and allow + #pip to create the appropriate form of executable for the target platform. + entry_points={ 'console_scripts': [ 'singa=singa:main', http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/__init__.py ---------------------------------------------------------------------- diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py new file mode 100644 index 0000000..d5e48bb --- /dev/null +++ b/src/python/singa/__init__.py @@ -0,0 +1,3 @@ +def main(): + """Entry point for the application script""" + print("Welcome to SINGA!") http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/device.py ---------------------------------------------------------------------- diff --git a/src/python/singa/device.py b/src/python/singa/device.py new file mode 100644 index 0000000..3db90bf --- /dev/null +++ b/src/python/singa/device.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ============================================================================= +''' +This script includes Device class and its subclasses for python users +to call singa::Device and its methods. + +TODO(wangwei) implement py CudaGPU class. +''' + +from . import singa_wrap as singa + + +class Device(object): + """ Class and member functions for singa::Device. + + Create Device instances using the CreateXXXDevice. + """ + + def __init__(self, id, device): + """Device constructor given device ID. + + Args: + id (int): device ID. + device: swig shared_ptr<Device> + """ + self.id = id + self.singa_device = device + + def set_rand_seed(self, seed): + self.singa_device.SetRandSeed(seed) + + def get_host(self): + return self.singa_device.host() + + def get_id(self): + return self.singa_device.id() + + +def get_num_gpus(): + return singa.Platform.GetNumGPUs() + + +def get_gpu_ids(): + return singa.Platform.GetGPUIDs() + + +def get_gpu_mem_size(id): + return singa.Platform.GetGPUMemSize(id) + + +def device_query(id, verbose=False): + return singa.Platform.DeviceQuery(id, verbose) + + +def create_cuda_gpus(num): + return singa.Platform.CreateCudaGPUs(num) + + +def create_cuda_gpu(): + return singa.Platform.CreateCudaGPUs(1)[0] http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/initializer.py ---------------------------------------------------------------------- diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py new file mode 100644 index 0000000..15caed3 --- /dev/null +++ b/src/python/singa/initializer.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ============================================================================= +"""Popular initialization methods for parameter values (Tensor ojects)""" + +import math + + +def uniform(t, low=0, high=1): + t.uniform(low, high) + + +def gaussian(t, mean=0, std=0.01): + t.gaussian(mean, std) + + +def xavier(t): + scale = math.sqrt(6.0 / (t.shape[0] + t.shape[1])) + t.uniform(-scale, scale) + + +def glorot(t): + scale = math.sqrt(2.0 / (t.shape[0] + t.shape[1])) + t.gaussian(0, 1) + t *= scale + + +def msra(t): + t.gaussian(0, math.sqrt(2.0 / t.shape[0])) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/layer.py ---------------------------------------------------------------------- diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py new file mode 100644 index 0000000..937a7e1 --- /dev/null +++ b/src/python/singa/layer.py @@ -0,0 +1,600 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +""" Python layers which wraps the C++ layers by providing easy to construct APIs +""" + +from sets import Set +from . import singa_wrap +from .proto import model_pb2 +import tensor + + +class Layer(object): + """Base Python layer class. + + Usages: + 1. construct layer without input_sample_shapes, goto 2; + construct layer with input_sample_shapes, goto 3; + 2. call setup to create the parameters and setup other meta fields + 3. call forward or access layer members + 4. call backward and get parameters for update + """ + + def __init__(self, name, **kwargs): + self.layer = None # layer converted by swig + self.name = name # TODO(wangwei) duplicate with self.conf.name + self.conf = model_pb2.LayerConf() + self.conf.name = name + self.param_specs = [] + self.has_setup = False + + def param_names(self): + names = [] + for x in self.param_specs: + names.append(x['name']) + return names + + def setup(self, in_shapes): + """Call the C++ setup function to create params and set some meta data. + + Args: + in_shapes: if the layer accepts a single input Tensor, in_shapes is + a single tuple specifying the inpute Tensor shape; if the layer + accepts multiple input Tensor (e.g., the concatenation layer), + in_shapes is a tuple of tuples, each for one input Tensor shape + """ + if self.has_setup: + return + self.layer.Setup(list(in_shapes), + self.conf.SerializeToString()) + self.has_setup = True + + def get_output_sample_shape(self): + assert self.has_setup, \ + 'Must call setup() before get_output_sample_shape()' + return self.layer.GetOutputSampleShape() + + def param_values(self): + """Return param value tensors. + + Do not store these tensors as layer members because cpp Tensor could be + moved onto diff devices due to the change of layer device. However, the + py tensors would not update its internal cpp tensor automatically. + """ + return tensor.from_raw_tensors(self.layer.param_values()) + + def forward(self, flag, input): + assert self.has_setup, 'Must call setup() before forward()' + assert isinstance(input, tensor.Tensor), 'input must be py Tensor' + y = self.layer.Forward(flag, input.singa_tensor) + return tensor.from_raw_tensor(y) + + def backward(self, flag, grad): + assert isinstance(grad, tensor.Tensor), 'grad must be py Tensor' + ret = self.layer.Backward(flag, grad.singa_tensor) + return tensor.from_raw_tensor(ret[0]), tensor.from_raw_tensors(ret[1]) + + def to_device(self, device): + self.layer.ToDevice(device) + + def as_type(self, dtype): + self.layer.AsType(dtype) + + def __copy__(self): + pass + + def __deepcopy__(self): + pass + + +class Conv2D(Layer): + + def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same', + engine='cudnn', cudnn_prefer='fatest', data_format='NCHW', + use_bias=True, W_specs=None, b_specs=None, + pad=None, input_sample_shape=None): + """Construct a layer for 2D convolution. + + Args: + nb_kernels (int): num of the channels (kernels) of the input Tensor + kernel: an integer or a pair of integers for kernel height and width + stride: an integer or a pair of integers for stride height and width + border_mode (string): padding mode, case in-sensitive, + 'valid' -> padding is 0 for height and width + 'same' -> padding is half of the kernel (floor), + the kernel must be odd number. + engine (string): implementation engin, could be 'cudnn' + (case insensitive) + cudnn_prefer (string): the preferred algorithm for cudnn convolution + which could be 'fatest', 'autotune', 'limited_workspace' and + 'no_workspace' + data_format (string): either 'NCHW' or 'NHWC' + use_bias (bool): True or False + pad: an integer or a pair of integers for padding height and width + W_specs (dict): used to specify the weight matrix specs, fields + include, + 'name' for parameter name + 'lr_mult' for learning rate multiplier + 'decay_mult' for weight decay multiplier + 'init' for init method, which could be 'gaussian', 'uniform', + 'xavier' and '' + 'std', 'mean', 'high', 'low' for corresponding init methods + TODO(wangwei) 'clamp' for gradient constraint, value is scalar + 'regularizer' for regularization, currently support 'l2' + b_specs (dict): hyper-parameters for bias vector, similar as W_specs + name (string): layer name. + input_sample_shape: 3d tuple for the shape of the input Tensor + without the batchsize, e.g., (channel, height, width) or + (height, width, channel) + """ + super(Conv2D, self).__init__(name) + assert data_format == 'NCHW', 'Not supported data format: %s ' \ + 'only "NCHW" is enabled currently' % (data_format) + conf = self.conf.convolution_conf + conf.num_output = nb_kernels + conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad) + conf.bias_term = use_bias + # TODO(wangwei) enable data format for cpp code + # conf.data_format = data_format + if W_specs is None: + W_specs = {'init': 'xavier'} + if b_specs is None: + b_specs = {'init': 'constant'} + if 'name' not in W_specs: + W_specs['name'] = name + '_weight' + if 'name' not in b_specs: + b_specs['name'] = name + '_bias' + wspecs = _construct_param_specs_from_dict(W_specs) + self.conf.param.extend([wspecs]) + self.param_specs.append(wspecs) + bspecs = _construct_param_specs_from_dict(b_specs) + self.conf.param.extend([bspecs]) + self.param_specs.append(bspecs) + + _check_engine(engine, ['cudnn']) + self.layer = _create_layer(engine, 'Convolution') + if input_sample_shape is not None: + self.setup(input_sample_shape) + + +class Conv1D(Conv2D): + + def __init__(self, name, nb_kernels, kernel=3, stride=1, + border_mode='same', engine='cudnn', cudnn_prefer='fatest', + use_bias=True, W_specs={'init': 'Xavier'}, + b_specs={'init': 'Constant', 'value': 0}, pad=None, + input_sample_shape=None): + """Construct a layer for 1D convolution. + + Most of the args are the same as those for Conv2D except the kernel, + stride, pad, which is a scalar instead of a tuple. + input_sample_shape is a tuple with a single value for the input feature + length + """ + pad = None + if pad is not None: + pad = (0, pad) + if input_sample_shape is not None: + input_sample_shape = (1, 1, input_sample_shape[0]) + super(Conv1D, self).__init__(name, nb_kernels, (1, kernel), (0, stride), + border_mode, engine, cudnn_prefer, + use_bias=use_bias, pad=pad, + W_specs=W_specs, b_specs=b_specs, + input_sample_shape=input_sample_shape) + + def get_output_sample_shape(self): + shape = self.layer.GetOutputSampleShape() + assert len(shape) == 3, 'The output sample shape should be 3D.'\ + 'But the length is %d' % len(shape) + return (shape[0], shape[2]) + + +class Pooling2D(Layer): + + def __init__(self, name, mode, kernel=3, stride=2, border_mode='same', + pad=None, data_format='NCHW', engine='cudnn', + input_sample_shape=None): + super(Pooling2D, self).__init__(name) + assert data_format == 'NCHW', 'Not supported data format: %s ' \ + 'only "NCHW" is enabled currently' % (data_format) + conf = self.conf.pooling_conf + conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad) + conf.pool = mode + _check_engine(engine, ['cudnn']) + self.layer = _create_layer(engine, 'Pooling') + if input_sample_shape is not None: + self.setup(input_sample_shape) + + +class MaxPooling2D(Pooling2D): + + def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None, + data_format='NCHW', engine='cudnn', input_sample_shape=None): + super(MaxPooling2D, self).__init__(name, model_pb2.PoolingConf.MAX, + kernel, stride, border_mode, + pad, data_format, engine, + input_sample_shape) + + +class AvgPooling2D(Pooling2D): + + def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None, + data_format='NCHW', engine='cudnn', input_sample_shape=None): + super(AvgPooling2D, self).__init__(name, model_pb2.PoolingConf.AVE, + kernel, stride, border_mode, + pad, data_format, engine, + input_sample_shape) + + +class MaxPooling1D(MaxPooling2D): + + def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None, + data_format='NCHW', engine='cudnn', input_sample_shape=None): + """Max pooling for 1D feature. + + Args: + input_sample_shape (tuple): 1D tuple for input feature length + """ + pad = None + if pad is not None: + pad = (0, pad) + if input_sample_shape is not None: + assert len(input_sample_shape) == 1, \ + 'AvgPooling1D expects input sample to be 1D' + input_sample_shape = (1, 1, input_sample_shape[0]) + else: + input_sample_shape = None + super(MaxPooling1D, self).__init__(name, (1, kernel), (0, stride), + border_mode, pad, + data_format, engine, + input_sample_shape) + + def get_output_sample_shape(self): + shape = self.layer.GetOutputSampleShape() + return (shape[2],) + + +class AvgPooling1D(AvgPooling2D): + + def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None, + data_format='NCHW', engine='cudnn', input_sample_shape=None): + """input_feature_length is a scalar value""" + pad2 = None + if pad is not None: + pad2 = (pad, 0) + if input_sample_shape is not None: + assert len(input_sample_shape) == 1, \ + 'AvgPooling1D expects input sample to be 1D' + input_sample_shape = (1, 1, input_sample_shape[0]) + else: + input_sample_shape = None + + super(AvgPooling1D, self).__init__(name, (kernel, 1), (0, stride), + border_mode, pad2, + data_format, engine, + input_sample_shape) + + def get_output_sample_shape(self): + shape = self.layer.GetOutputSampleShape() + return (shape[2],) + + +class BatchNormalization(Layer): + # TODO(wangwei) add mode and epsilon arguments + + def __init__(self, name, momentum=0.9, engine='cudnn', + beta_specs=None, gamma_specs=None, input_sample_shape=None): + """Batch-normalization. + + Args: + momentum (float): for running average mean and variance. + beta_specs (dict): dictionary includes the fields for the beta + param: + 'name' for parameter name + 'lr_mult' for learning rate multiplier + 'decay_mult' for weight decay multiplier + 'init' for init method, which could be 'gaussian', 'uniform', + 'xavier' and '' + 'std', 'mean', 'high', 'low' for corresponding init methods + 'clamp' for gradient constraint, value is scalar + 'regularizer' for regularization, currently support 'l2' + gamma_specs (dict): similar to beta_specs, but for the gamma param. + name (string): layer name + input_sample_shape (tuple): with at least one integer + """ + super(BatchNormalization, self).__init__(name) + conf = self.conf.batchnorm_conf + conf.factor = momentum + if beta_specs is None: + beta_specs = {'init': 'Xavier'} + if gamma_specs is None: + gamma_specs = {'init': 'Xavier'} + if 'name' not in beta_specs: + beta_specs['name'] = name + '_beta' + if 'name' not in gamma_specs: + gamma_specs['name'] = name + '_gamma' + self.conf.param.extend([_construct_param_specs_from_dict(beta_specs)]) + self.conf.param.extend([_construct_param_specs_from_dict(gamma_specs)]) + self.param_specs.append(_construct_param_specs_from_dict(beta_specs)) + self.param_specs.append(_construct_param_specs_from_dict(gamma_specs)) + _check_engine(engine, ['cudnn']) + self.layer = _create_layer(engine, 'BatchNorm') + if input_sample_shape is not None: + self.setup(input_sample_shape) + + +class LRN(Layer): + + def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel', + k=1, engine='cudnn', input_sample_shape=None): + """Local response normalization. + + Args: + size (int): # of channels to be crossed + normalization. + mode (string): 'cross_channel' + input_sample_shape (tuple): 3d tuple, (channel, height, width) + """ + super(LRN, self).__init__(name) + conf = self.conf.lrn_conf + conf.local_size = size + conf.alpha = alpha + conf.beta = beta + conf.k = k + # TODO(wangwei) enable mode = 'within_channel' + assert mode == 'cross_channel', 'only support mode="across_channel"' + conf.norm_region = model_pb2.LRNConf.ACROSS_CHANNELS + _check_engine(engine, ['cudnn']) + self.layer = _create_layer(engine, 'LRN') + if input_sample_shape is not None: + self.setup(input_sample_shape) + + +class Dense(Layer): + + def __init__(self, name, num_output, use_bias=True, + W_specs=None, b_specs=None, + W_transpose=True, engine='cuda', input_sample_shape=None): + """Apply linear/affine transformation, also called inner-product or + fully connected layer. + + Args: + num_output (int): output feature length. + use_bias (bool): add a bias vector or not to the transformed feature + W_specs (dict): specs for the weight matrix + 'name' for parameter name + 'lr_mult' for learning rate multiplier + 'decay_mult' for weight decay multiplier + 'init' for init method, which could be 'gaussian', 'uniform', + 'xavier' and '' + 'std', 'mean', 'high', 'low' for corresponding init methods + 'clamp' for gradient constraint, value is scalar + 'regularizer' for regularization, currently support 'l2' + b_specs (dict): specs for the bias vector, same fields as W_specs. + W_transpose (bool): if true, output=x*W.T+b; + engine (string): could be 'cudnn', 'cuda' + input_sample_shape (tuple): input feature length + """ + super(Dense, self).__init__(name) + conf = self.conf.dense_conf + conf.num_output = num_output + conf.bias_term = use_bias + conf.transpose = W_transpose + if W_specs is None: + W_specs = {'init': 'xavier'} + if b_specs is None: + b_specs = {'init': 'constant'} + if 'name' not in W_specs: + W_specs['name'] = name + '_weight' + if 'name' not in b_specs: + b_specs['name'] = name + '_bias' + self.conf.param.extend([_construct_param_specs_from_dict(W_specs)]) + self.param_specs.append(_construct_param_specs_from_dict(W_specs)) + self.conf.param.extend([_construct_param_specs_from_dict(b_specs)]) + self.param_specs.append(_construct_param_specs_from_dict(b_specs)) + if engine == 'cudnn': + engine = 'cuda' + _check_engine(engine, ['cuda', 'cpp']) + self.layer = _create_layer(engine, 'Dense') + if input_sample_shape is not None: + self.setup(input_sample_shape) + + +class Dropout(Layer): + + def __init__(self, name, p=0.5, engine='cuda', input_sample_shape=None): + """Droput layer. + + Args: + p (float): probability for dropping out the element, i.e., set to 0 + engine (string): 'cudnn' for cudnn version>=5; or 'cuda' + name (string): layer name + """ + super(Dropout, self).__init__(name) + conf = self.conf.dropout_conf + conf.dropout_ratio = p + # 'cudnn' works for v>=5.0 + # if engine.lower() == 'cudnn': + # engine = 'cuda' + _check_engine(engine, ['cudnn', 'cuda', 'cpp']) + self.layer = _create_layer(engine, 'Dropout') + if input_sample_shape is not None: + self.setup(input_sample_shape) + + +class Activation(Layer): + + def __init__(self, name, mode='relu', engine='cudnn', + input_sample_shape=None): + """Activation layers. + + Args: + engine (string): 'cudnn' + name (string): layer name + mode (string): 'relu', 'sigmoid', or 'tanh' + input_sample_shape (tuple): shape of a single sample + """ + super(Activation, self).__init__(name) + _check_engine(engine, ['cudnn', 'cuda', 'cpp']) + mode_dict = {'relu': 'RELU', 'sigmoid': 'SIGMOID', 'tanh': 'TANH'} + self.conf.type = mode_dict[mode.lower()] + self.layer = _create_layer(engine, 'Activation') + if input_sample_shape is not None: + self.setup(input_sample_shape) + + +class Softmax(Layer): + + def __init__(self, name, axis=1, engine='cudnn', input_sample_shape=None): + """Apply softmax. + + Args: + axis (int): reshape the input as a matrix with the dimension + [0,axis) as the row, the [axis, -1) as the column. + input_sample_shape (tuple): shape of a single sample + """ + super(Softmax, self).__init__(name) + # conf = self.conf.softmax_conf + # conf.axis = axis + _check_engine(engine, ['cudnn', 'cuda', 'cpp']) + self.layer = _create_layer(engine, 'Softmax') + if input_sample_shape is not None: + self.setup(input_sample_shape) + + +class Flatten(Layer): + + def __init__(self, name, axis=1, engine='cudnn', input_sample_shape=None): + """Reshape the input tensor into a matrix. + Args: + axis (int): reshape the input as a matrix with the dimension + [0,axis) as the row, the [axis, -1) as the column. + input_sample_shape (tuple): shape for a single sample + """ + super(Flatten, self).__init__(name) + conf = self.conf.flatten_conf + conf.axis = axis + _check_engine(engine, ['cudnn', 'cuda', 'cpp']) + if engine == 'cudnn': + engine = 'cuda' + self.layer = _create_layer(engine, 'Flatten') + if input_sample_shape is not None: + self.setup(input_sample_shape) + + +def _check_engine(engine, allowed_engines): + assert engine.lower() in Set(allowed_engines), \ + '%s is not a supported engine. Pls use one of %s' % \ + (engine, ', '.join(allowed_engines)) + + +def _create_layer(engine, layer): + if engine == 'cuda' or engine == 'cpp': + layer_type = layer + else: + layer_type = engine.title() + layer + return singa_wrap.CreateLayer(layer_type) + + +def _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad): + """Private function called by Convolution2D and Pooling2D.""" + if isinstance(kernel, tuple): + conf.kernel_h = kernel[0] + conf.kernel_w = kernel[1] + else: + conf.kernel_h = kernel + conf.kernel_w = kernel + if isinstance(stride, tuple): + conf.stride_h = stride[0] + conf.stride_w = stride[1] + else: + conf.stride_h = stride + conf.stride_w = stride + mode = border_mode.lower() + if pad is None: + # TODO(wangwei) check the border mode + if mode == 'same': + assert conf.kernel_h % 2 == 1 and conf.kernel_w % 2 == 1, \ + 'Must use odd kernel for mode="same", kernel is (%d, %d)' % ( + conf.kernel_h, conf.kernel_w) + pad = (conf.kernel_h / 2, conf.kernel_w / 2) + elif mode == 'valid': + pad = (0, 0) + else: + assert False, ('Unsupported border_mode: %s. ' + 'Please use {"valid", "same"}' % border_mode) + assert isinstance(pad, tuple), 'pad should be a tuple' + if isinstance(pad, tuple): + conf.pad_h = pad[0] + conf.pad_w = pad[1] + else: + conf.pad_h = pad + conf.pad_w = pad + return conf + + +def _construct_param_specs_from_dict(specs): + """Conver the param specs from a dict into ParamSpec protobuf object. + + Args: + specs (dict): the fields inlcude + 'name' for parameter name + 'lr_mult' for learning rate multiplier; + 'decay_mult' for weight decay multiplier; + 'init' for init method, which could be 'gaussian', 'uniform', + 'xavier' and 'msra'; + 'std', 'mean', 'high', 'low' are used by corresponding init methods; + 'constraint' for gradient constraint, value is a float threshold for + clampping the gradient. + 'regularizer' for regularization, currently support 'l2', value is a + float for the coefficient. + + Returns: + a ParamSpec object + """ + conf = model_pb2.ParamSpec() + if 'name' in specs: + conf.name = specs['name'] + if 'lr_mult' in specs: + conf.lr_mult = specs['lr_mult'] + if 'decay_mult' in specs: + conf.decay_mult = specs['decay_mult'] + if 'init' in specs: + filler = conf.filler + filler.type = specs['init'].lower() + if specs['init'].lower() == 'uniform': + assert 'low' in specs and 'high' in specs, \ + 'low and high are required for "uniform" init method' + filler.low = specs['low'] + filler.high = specs['high'] + elif specs['init'].lower() == 'gaussian': + assert 'mean' in specs and 'std' in specs, \ + 'std and mean are required for "gaussian" init method' + filler.mean = specs['mean'] + filler.std = specs['std'] + elif specs['init'].lower() == 'constant' and 'value' in specs: + filler.value = specs['value'] + if 'regularizer' in specs: + conf.regularizer.coefficient = specs['regularizer'] + if 'constraint' in specs: + conf.constraint.threshold = specs['constraint'] + return conf + + +def get_layer_list(): + """ Return a list of strings reprensenting the all supported layers""" + return singa_wrap.GetRegisteredLayers() http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/loss.py ---------------------------------------------------------------------- diff --git a/src/python/singa/loss.py b/src/python/singa/loss.py new file mode 100644 index 0000000..acfb813 --- /dev/null +++ b/src/python/singa/loss.py @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ============================================================================= +""" Python wrappers for optimizers implemented by C++.""" + +from . import singa_wrap as singa +import tensor + + +class Loss(object): + + def __init__(self): + self.swig_loss = None + + def forward(self, flag, x, y): + """Return a tensor of floats, one per sample""" + return tensor.from_raw_tensor( + self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor)) + + def backward(self): + """Return the grad of x w.r.t. the loss obj""" + return tensor.from_raw_tensor(self.swig_loss.Backward()) + + def evaluate(self, flag, x, y): + """Return the averaged loss for all samples in x""" + return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor) + + +class SoftmaxCrossEntropy(Loss): + + def __init__(self): + self.swig_loss = singa.SoftmaxCrossEntropy() http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/metric.py ---------------------------------------------------------------------- diff --git a/src/python/singa/metric.py b/src/python/singa/metric.py new file mode 100644 index 0000000..31b6892 --- /dev/null +++ b/src/python/singa/metric.py @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ============================================================================= +""" Python wrappers for optimizers implemented by C++.""" + +from . import singa_wrap as singa +import tensor + + +class Metric(object): + + def __init__(self): + self.swig_metric = None + + def forward(self, x, y): + """Return a tensor of floats, one per sample""" + return tensor.from_raw_tensor( + self.swig_metric.Forward(x.singa_tensor, y.singa_tensor)) + + def evaluate(self, x, y): + """Return the averaged metric for all samples in x""" + return self.swig_metric.Evaluate(x.singa_tensor, y.singa_tensor) + + +class Accuracy(Metric): + + def __init__(self): + self.swig_metric = singa.Accuracy() http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/model.py ---------------------------------------------------------------------- diff --git a/src/python/singa/model.py b/src/python/singa/model.py new file mode 100644 index 0000000..38d9950 --- /dev/null +++ b/src/python/singa/model.py @@ -0,0 +1,21 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +class Model(Object): + pass + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/net.py ---------------------------------------------------------------------- diff --git a/src/python/singa/net.py b/src/python/singa/net.py new file mode 100644 index 0000000..084db4b --- /dev/null +++ b/src/python/singa/net.py @@ -0,0 +1,120 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +""" +Nerual net class for constructing the nets using layers and providing access +functions for net info, e.g., parameters. +""" + + +from .proto.model_pb2 import kTrain, kEval +import tensor +import cPickle as pickle + + +class FeedForwardNet(object): + + def __init__(self, loss=None, metric=None): + self.loss = loss + self.metric = metric + self.layers = [] + + def to_device(self, dev): + for lyr in self.layers: + lyr.to_device(dev) + + def add(self, lyr): + """Append a layer into the layer list. + + This function will get the sample shape from the last layer to setup + the newly added layer. For the first layer, it is setup outside. + The calling function should ensure the correctness of the layer order. + + Args: + lyr (Layer): the layer to be added + """ + if len(self.layers) > 0 and lyr.has_setup is False: + shape = self.layers[-1].get_output_sample_shape() + print shape + lyr.setup(shape) + self.layers.append(lyr) + + def param_values(self): + values = [] + for lyr in self.layers: + values.extend(lyr.param_values()) + return values + + def param_specs(self): + specs = [] + for lyr in self.layers: + specs.extend(lyr.param_specs) + return specs + + def train(self, x, y): + out = self.forward(kTrain, x) + l = self.loss.forward(kTrain, out, y) + if self.metric is not None: + m = self.metric.evaluate(out, y) + return self.backward(), (l.l1(), m) + + def evaluate(self, x, y): + """Evaluate the loss and metric of the given data""" + out = self.forward(kEval, x) + l = None + m = None + assert self.loss is not None or self.metric is not None,\ + 'Cannot do evaluation, as neither loss nor metic is set' + if self.loss is not None: + l = self.loss.evaluate(kEval, out, y) + if self.metric is not None: + m = self.metric.evaluate(out, y) + return l, m + + def predict(self, x): + xx = self.forward(kEval, x) + return tensor.softmax(xx) + + def forward(self, flag, x): + for lyr in self.layers: + x = lyr.forward(flag, x) + # print lyr.name, x.l1() + return x + + def backward(self, flag=kTrain): + grad = self.loss.backward() + pgrads = [] + for lyr in reversed(self.layers): + grad, _pgrads = lyr.backward(flag, grad) + for g in reversed(_pgrads): + pgrads.append(g) + return reversed(pgrads) + + def save(self, f): + """Save model parameters using cpickle""" + params = {} + for (specs, val) in zip(self.param_specs(), self.param_values()): + val.to_host() + params[specs.name] = tensor.to_numpy(val) + with open(f, 'wb') as fd: + pickle.dump(params, fd) + + def load(self, f): + """Load model parameters using cpickle""" + with open(f, 'rb') as fd: + params = pickle.load(fd) + for (specs, val) in zip(self.param_specs(), self.param_values()): + val.copy_from_numpy(params[specs.name]) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/optimizer.py ---------------------------------------------------------------------- diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py new file mode 100644 index 0000000..503527f --- /dev/null +++ b/src/python/singa/optimizer.py @@ -0,0 +1,343 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ============================================================================= +""" Python wrappers for optimizers implemented by C++.""" + +from . import singa_wrap as singa +import tensor +from proto import model_pb2 + + +class Optimizer(object): + """Base python optimizer. + + Usages: + 1. construct the optimizer + 2. (optional) register each parameter with its specs. + 3. use the optimizer to update parameter values given parameter + gradients and other optional info + """ + + def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None, + momentum_gen=None, regularizer=None, constraint=None): + """Constructor. + + Args: + lr: a constant or a function that generates learning rate given a + step, which is mutually exclusive with 'lr_gen'. + momentum: a constant or a function that generates the momentum value + given a step. + decay (float): the coefficent for L2 regularizer, which is mutually + exclusive with 'regularizer'. + lr_gen (function): a function returns the learning rate given + the current training step. It is mutually exclusive with lr. If + both are not set, the apply_with_lr function should be used for + param updating. + momentum_gen (function): a function returns the momentum value given + the current training step. It is mutually exclusive with + momentum. + regularizer: an instance of Regularizer or RegularizerConf; If set, + regularization would be applied in apply_with_lr(). + Users can also do regularization outside. + constraint: an instance of Constraint or ConstraintConf; If set, + constraint would be applied inside apply_with_lr(). Users can + also do regularization outside. + """ + if lr is not None: + assert lr_gen is None, 'Cannot set lr and lr_gen at the same time' + + def lr_gen(step): + return lr + self.lr_gen = lr_gen + if momentum is not None: + assert momentum_gen is None, 'Cannot set momentum and momentum_gen'\ + ' at the same time' + + def momentum_gen(step): + return momentum + self.momentum_gen = momentum_gen + if decay is not None: + assert regularizer is None, \ + 'Cannot set decay and regularizer at the same time' + regularizer = L2Regularizer(decay) + if regularizer is not None: + if isinstance(regularizer, model_pb2.RegularizerConf): + self.regularizer = CppRegularizer(regularizer) + else: + self.regularizer = regularizer + else: + self.regularizer = None + if constraint is not None: + if isinstance(constraint, model_pb2.ConstraintConf): + self.constraint = CppConstraint(constraint) + else: + self.constraint = constraint + else: + self.constraint = None + self.regularizers = {} + self.constraints = {} + self.decay_multiplier = {} + self.learning_rate_multiplier = {} + + def register(self, name, specs): + """Register the param specs, including creating regularizer and + constraint per param object. Param specific regularizer and constraint + have higher priority than the global ones. + + Args: + name (str): parameter name + specs (ParamSpec): protobuf obj + """ + if specs.HasField('regularizer'): + self.regularizers[name] = CppRegularizer(specs.constraint) + if specs.HasField('constraint'): + self.constraints[name] = CppConstraint(specs.regularizer) + if specs.lr_mult != 1: + self.learning_rate_multiplier[name] = specs.lr_mult + if specs.decay_mult != 1: + self.decay_multiplier[name] = specs.decay_mult + + def apply_regularizer_constraint(self, value, grad, name=None, step=None): + """Apply regularization and constraint if available. + + If there are both global regularizer (constraint) and param specific + regularizer (constraint), it would use the param specific one. + + Args: + value (Tensor): parameter value Tensor + grad (Tensor): parameter gradient Tensor + name (string): to get parameter specific regularizer or constraint + step (int): some regularizer or constraint would use step + + Return: + the updated gradient Tensor + """ + if name is not None and name in self.constraints: + self.constraints[name].apply(value, grad, step) + elif self.constraint is not None: + self.constraint.apply(step, value, grad) + + if name is not None and name in self.regularizers: + self.regularizers[name].apply(value, grad, step) + elif self.regularizer is not None: + self.regularizer.apply(step, value, grad) + return grad + + def apply_with_lr(self, step, lr, grad, value, name=None): + """Do update with given learning rate. + + The subclass optimizer must override this function. + Args: + step (int): training step (could be iteration or epoch) + lr (float): learning rate + grad (Tensor): parameter gradient + value (Tesnor): parameter value + name (string): paramter name to retrieval parameter specific + updating rules (including regularizer and constraint) + + Return: + updated parameter value + """ + assert False, 'This is the base function, pls call the subclass func' + return value + + def apply(self, step, grad, value, name=None): + """Do update assume the learning rate generator is set. + + The subclass optimizer does not need to override this function. + Args: + step (int): training step (could be iteration or epoch) + grad (Tensor): parameter gradient + value (Tesnor): parameter value + name (string): paramter name to retrieval parameter specific + updating rules (including regularizer and constraint) + + Return: + updated parameter value + """ + + assert self.lr_gen is not None, 'Learning rate generator is not set.'\ + 'Either set the lr_gen in constructor or call apply_with_lr' + lr = self.lr_gen(step) + return self.apply_with_lr(step, lr, grad, value, name) + + +class SGD(Optimizer): + + def __init__(self, lr=None, momentum=None, decay=None, **kwargs): + """The vallina Stochasitc Gradient Descent algorithm. + + See the base Optimizer for all arguments. + """ + super(SGD, self).__init__(lr, momentum, decay) + conf = model_pb2.OptimizerConf() + self.opt = singa.CreateOptimizer('SGD') + self.opt.Setup(conf.SerializeToString()) + + def apply_with_lr(self, step, lr, grad, value, name): + self.apply_regularizer_constraint(step, value, grad, name) + self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor) + return value + + +class Nesterov(Optimizer): + + def __init__(self, lr=None, momentum=0.9, decay=None, **kwargs): + """The SGD with Nesterov momentum + + See the base Optimizer for all arguments. + """ + super(Nesterov, self).__init__(lr, momentum, decay, kwargs) + conf = model_pb2.OptimizerConf() + self.opt = singa.CreateOptimizer('Nesterov') + self.opt.Setup(conf.SerializeToString()) + + def apply_with_lr(self, step, lr, grad, value, name): + self.apply_regularizer_constraint(step, value, grad, name) + self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor) + return value + + +class AdaGrad(Optimizer): + + def __init__(self, epsilon=1e-8, lr=None, decay=None, **kwargs): + """AdaGrad optimizer. + + See the base Optimizer for all constructor args. + Args: + epsilon (float): small number for preventing numeric error. + """ + super(RMSProp, self).__init__(lr, decay, **kwargs) + conf = model_pb2.OptimizerConf() + conf.delta = epsilon + self.opt = singa.CreateOptimizer('AdaGrad') + self.opt.Setup(conf.SerializeToString()) + + def apply_with_lr(self, step, lr, grad, value, name): + grad = self.apply_regularizer_constraint(step, value, grad, name) + self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor) + return value + + +class RMSProp(Optimizer): + + def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, **kwargs): + """RMSProp optimizer. + + See the base Optimizer for all constructor args. + Args: + rho (float): float within [0, 1] + epsilon (float): small value for preventing numeric error + """ + super(RMSProp, self).__init__(lr, decay, kwargs) + conf = model_pb2.OptimizerConf() + conf.rho = rho + conf.delta = epsilon + self.opt = singa.CreateOptimizer('RMSProp') + self.opt.Setup(conf.SerializeToString()) + + def apply_with_lr(self, step, lr, grad, value, name): + grad = self.apply_regularizer_constraint(step, value, grad, name) + self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor) + return value + + +class Regularizer(object): + """Base Python regularizer for parameter gradients. + """ + + def apply(self, value, grad): + assert False, 'Not Implemented. Call the subclass function.' + return grad + + +class CppRegularizer(Regularizer): + """Wrapper for regularizer implemented using C++. + """ + + def __init__(self, conf): + """Constructor. + + Args: + conf (RegularizerConf): protobuf message for the configuration. + """ + self.reg = singa.CreateRegularizer(conf.type) + self.reg.Setup(conf.SerializeToString()) + + def apply(self, step, value, grad): + self.reg.Apply(step, value.singa_tensor, grad.singa_tensor) + return grad + + +class L2Regularizer(Regularizer): + """L2 regularization""" + + def __init__(self, coefficient): + """ + Args: + coefficient (float): regularization coefficient. + """ + self.coefficient = coefficient + + def apply(self, step, value, grad, coefficient=None): + if coefficient is None: + assert self.coefficient is not None, 'Must set the coefficient' + coefficient = self.coefficient + tensor.axpy(coefficient, value, grad) + return grad + + +class Constraint(object): + """Base Python constraint class for paramter gradients. + """ + + def apply(self, step, value, grad): + return grad + + +class CppConstraint(Constraint): + """Wrapper for constraints implemented using C++. + """ + + def __init__(self, conf): + """Constructor. + + Args: + conf (ConstraintConf): protobuf message for the configuration. + """ + self.constraint = singa.CreateConstraint(conf.type) + self.constraint.Setup(conf.SerializeToString()) + + def apply(self, step, value, grad): + self.constraint.Apply(step, value.singa_tensor, grad.singa_tensor) + return grad + + +class L2Constraint(Constraint): + """Rescale the gradient to make the L2 norm <= a given threshold. + """ + + def __init__(self, threshold=None): + self.threshold = threshold + + def apply(self, step, value, grad, threshold=None): + if threshold is None: + assert self.threshold is not None, 'Must set the threshold' + threshold = self.threshold + nrm = grad.l2() + grad *= threshold / nrm + return grad http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/tensor.py ---------------------------------------------------------------------- diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py new file mode 100644 index 0000000..2d6fa5a --- /dev/null +++ b/src/python/singa/tensor.py @@ -0,0 +1,521 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ============================================================================= +""" +This script includes Tensor class and its methods for python users +to call singa::Tensor and its methods +""" + +import numpy as np +from .proto import core_pb2 +from . import singa_wrap as singa +from functools import reduce + + +class Tensor(object): + ''' Class and member functions for singa::Tensor + ''' + + def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32): + ''' shape = (tuple) + ''' + if shape is None: + # call constructor of singa::Tensor + self.singa_tensor = singa.Tensor() + return + else: + assert isinstance(shape, tuple), 'shape should be tuple' + vs = _tuple_to_vector(shape) + if device is None: + self.singa_tensor = singa.Tensor(vs, dtype) + else: + self.singa_tensor = singa.Tensor(vs, device, dtype) + self.shape = shape + self.device = device + self.dtype = dtype + + def copy_from_numpy(self, np_array, offset=0): + ''' this method stores the values of numpy array into tensor data + from the position of offset + ''' + assert np_array.size == self.size(), 'tensor shape should be the same' + if not np_array.ndim == 1: + np_array = np_array.flatten() + dt = np_array.dtype + if dt == np.float32: + self.singa_tensor.floatCopyDataFromHostPtr(np_array) + elif dt == np.int or dt == np.int32: + self.singa_tensor.intCopyDataFromHostPtr(np_array) + else: + print 'Not implemented yet for ', dt + + # deprecated, access the member data_type directly + def data_type(self): + return self.singa_tensor.data_type() + + # deprecated, access the member shape directly + def shape(self, axis=None): + if axis is None: + return self.singa_tensor.shape() + else: + return self.singa_tensor.shape(axis) + + def ndim(self): + return self.singa_tensor.nDim() + + def is_transpose(self): # TODO(wangwei) make transpose a member + return self.singa_tensor.transpose() + + def size(self): # TODO(wangwei) compute size + return self.singa_tensor.Size() + + def memsize(self): + return self.singa_tensor.MemSize() + + def reshape(self, shape): + assert product(self.shape) == product(shape), \ + 'product of shape should be equal' + self.shape = shape + self.singa_tensor.Reshape(_tuple_to_vector(shape)) + + def reset_like(self, t): + self.singa_tensor.ResetLike(t.singa_tensor) + + def as_type(self, dtype): + self.singa_tensor.AsType(dtype) + + def to_device(self, device): + self.singa_tensor.ToDevice(device) + + def to_host(self): + self.singa_tensor.ToHost() + + def l2(self): + return self.singa_tensor.L2() + + def l1(self): + return self.singa_tensor.L1() + + def set_value(self, x): + if isinstance(x, float): + self.singa_tensor.floatSetValue(x) + + def copy_data(self, t): + self.singa_tensor.CopyData(t.singa_tensor) + + def clone(self): + ''' it does deep copy + call singa::Tensor::Clone() + ''' + return _call_singa_func(self.singa_tensor.Clone) + + def transpose(self): + ''' shallow copy, negate the transpose field + call singa::Tensor::T() + ''' + return _call_singa_func(self.singa_tensor.T) + + def copy(self): + ''' shallow copy + call copy constructor of singa::Tensor + ''' + return _call_singa_func(singa.Tensor, self.singa_tensor) + + def deepcopy(self): + ''' deep copy + call singa::Tensor::Clone() + ''' + return self.clone() + + def bernoulli(self, p): + singa.floatBernoulli(float(p), self.singa_tensor) + + def gaussian(self, mean, std): + singa.floatGaussian(float(mean), float(std), self.singa_tensor) + + def uniform(self, low, high): + singa.floatUniform(float(low), float(high), self.singa_tensor) + + def add_column(self, v): + singa.AddColumn(v.singa_tensor, self.singa_tensor) + + def add_row(self, v): + singa.AddRow(v.singa_tensor, self.singa_tensor) + + def div_column(self, v): + singa.DivColumn(v.singa_tensor, self.singa_tensor) + + def div_row(self, v): + singa.DivRow(v.singa_tensor, self.singa_tensor) + + def mult_column(self, v): + singa.MultColumn(v.singa_tensor, self.singa_tensor) + + def mult_row(self, v): + singa.MultRow(v.singa_tensor, self.singa_tensor) + + ''' + python operators (+=, -=, *=, /=) for singa::Tensor unary operators + ''' + + def __iadd__(self, x): + if isinstance(x, Tensor): + self.singa_tensor += x.singa_tensor + else: + self.singa_tensor += x + return self + + def __isub__(self, x): + if isinstance(x, Tensor): + self.singa_tensor -= x.singa_tensor + else: + self.singa_tensor -= x + return self + + def __imul__(self, x): + if isinstance(x, Tensor): + self.singa_tensor *= x.singa_tensor + else: + self.singa_tensor *= x + return self + + def __idiv__(self, x): + if isinstance(x, Tensor): + self.singa_tensor /= x.singa_tensor + else: + self.singa_tensor /= x + return self + + ''' + python operators (+, -, *, /, <, <=, >, >=) for singa binary operators + ''' + + def __add__(self, rhs): + if isinstance(rhs, Tensor): + return _call_singa_func(singa.Add_TT, + self.singa_tensor, rhs.singa_tensor) + else: + return _call_singa_func(singa.Add_Tf, + self.singa_tensor, rhs) + + def __sub__(self, rhs): + if isinstance(rhs, Tensor): + return _call_singa_func(singa.Sub_TT, + self.singa_tensor, rhs.singa_tensor) + else: + return _call_singa_func(singa.Sub_Tf, + self.singa_tensor, rhs) + + def __mul__(self, rhs): + if isinstance(rhs, Tensor): + return _call_singa_func(singa.EltwiseMul_TT, + self.singa_tensor, rhs.singa_tensor) + else: + return _call_singa_func(singa.EltwiseMul_Tf, + self.singa_tensor, rhs) + + def __div__(self, rhs): + if isinstance(rhs, Tensor): + return _call_singa_func(singa.Div_TT, + self.singa_tensor, rhs.singa_tensor) + else: + return _call_singa_func(singa.Div_Tf, + self.singa_tensor, rhs) + + def __lt__(self, rhs): + return _call_singa_func(singa.LT_Tf, self.singa_tensor, rhs) + + def __le__(self, rhs): + return _call_singa_func(singa.LE_Tf, self.singa_tensor, rhs) + + def __gt__(self, rhs): + return _call_singa_func(singa.GT_Tf, self.singa_tensor, rhs) + + def __ge__(self, rhs): + return _call_singa_func(singa.GE_Tf, self.singa_tensor, rhs) + + +''' python functions for global functions in Tensor.h +''' + + +def from_raw_tensor(t): + x = Tensor(t.shape(), t.device(), t.data_type()) + x.singa_tensor = t + return x + + +def from_raw_tensors(tt): + ret = [] + for t in list(tt): + ret.append(from_raw_tensor(t)) + return ret + + +def product(shape): + return reduce(lambda x, y: x * y, shape) + + +def sizeof(dtype): + return singa.SizeOf(dtype) + + +def reshape(t, s): + return _call_singa_func(singa.Reshape, t.singa_tensor, s) + + +def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0): + singa.CopyDataToFrom(dst.singa_tensor, src.singa_tensor, size, + dst_offset, src_offset) + + +def from_numpy(np_array): + ret = Tensor(np_array.shape) + ret.copy_from_numpy(np_array) + return ret + + +def to_numpy(t): + ''' this method gets the values of tensor data and + returns it as numpy array + TODO(wangwei) clone t to host + ''' + if t.dtype == core_pb2.kFloat32: + np_array = t.singa_tensor.floatGetValue(int(t.size())) + elif t.dtype == core_pb2.kInt: + np_array = t.singa_tensor.intGetValue(int(t.size())) + else: + print 'Not implemented yet for ', t.dtype + return np_array.reshape(t.shape) + + +def abs(t): + return _call_singa_func(singa.Abs, t.singa_tensor) + + +def exp(t): + return _call_singa_func(singa.Exp, t.singa_tensor) + + +def log(t): + return _call_singa_func(singa.Log, t.singa_tensor) + + +def relu(t): + return _call_singa_func(singa.ReLU, t.singa_tensor) + + +def sigmoid(t): + return _call_singa_func(singa.Sigmoid, t.singa_tensor) + + +def square(t): + return _call_singa_func(singa.Square, t.singa_tensor) + + +def tanh(t): + return _call_singa_func(singa.Tanh, t.singa_tensor) + + +def sum(t, axis=None): + if axis is None: + return singa.floatSum(t.singa_tensor) + else: + return _call_singa_func(singa.Sum, t.singa_tensor, axis) + + +def pow(t, x, out=None): + if out is None: + if isinstance(x, Tensor): + return _call_singa_func(singa.Pow, t.singa_tensor, x.singa_tensor) + else: + return _call_singa_func(singa.Pow_f, t.singa_tensor, x) + else: + if isinstance(x, Tensor): + singa.Pow(t.singa_tensor, x.singa_tensor, out.singa_tensor) + else: + singa.Pow_f_out(t.singa_tensor, x, out.singa_tensor) + return out + + +def average(t, axis=0): + if t.ndim() > 1: + return _call_singa_func(singa.Average, t.singa_tensor, axis) + else: + return singa.floatSum(t.singa_tensor) / t.size() + + +def softmax(t, out=None): + if out is None: + return _call_singa_func(singa.SoftMax, t.singa_tensor) + else: + singa.SoftMax(t.singa_tensor, out.singa_tensor) + return out + + +def lt(t, x): + return t < x + + +def le(t, x): + return t <= x + + +def gt(t, x): + return t > x + + +def ge(t, x): + return t >= x + + +def add(lhs, rhs, ret=None): + if ret is None: + # call Tensor.__add__() + return lhs + rhs + else: + if isinstance(rhs, Tensor): + singa.Add(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor) + else: + singa.Add_Tf_out(lhs.singa_tensor, rhs, ret.singa_tensor) + return ret + + +def sub(lhs, rhs, ret=None): + if ret is None: + # call Tensor.__sub__() + return lhs - rhs + else: + if isinstance(rhs, Tensor): + singa.Sub(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor) + else: + singa.Sub_Tf_out(lhs.singa_tensor, rhs, ret.singa_tensor) + return ret + + +def eltwise_mult(lhs, rhs, ret=None): + if ret is None: + # call Tensor.__mul__() + return lhs * rhs + else: + if isinstance(rhs, Tensor): + singa.EltwiseMult(lhs.singa_tensor, rhs.singa_tensor, + ret.singa_tensor) + else: + singa.EltwiseMult_Tf_out(lhs.singa_tensor, rhs, + ret.singa_tensor) + return ret + + +def mult(A, B, C=None, alpha=1.0, beta=0.0): + ''' + This function returns C = alpha * A * B + beta * C + ''' + if C is None: + return _call_singa_func(singa.Mult, A.singa_tensor, B.singa_tensor) + else: + singa.floatMult(alpha, A.singa_tensor, B.singa_tensor, + beta, C.singa_tensor) + return C + + +def div(lhs, rhs, ret=None): + if ret is None: + # call Tensor.__div__() + return lhs / rhs + else: + if isinstance(rhs, Tensor): + singa.Div(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor) + else: + singa.Div_Tf_out(lhs.singa_tensor, rhs, ret.singa_tensor) + return ret + + +def axpy(alpha, x, y): + if isinstance(alpha, float): + singa.floatAxpy(alpha, x.singa_tensor, y.singa_tensor) + return y + + +def bernoulli(p, t): + if isinstance(p, float): + singa.floatBernoulli(p, t.singa_tensor) + return t + + +def gaussian(mean, std, t): + if isinstance(mean, float): + singa.floatGaussian(mean, std, t.singa_tensor) + return t + + +def uniform(low, high, t): + if isinstance(low, float): + singa.floatUniform(low, high, t.singa_tensor) + return t + + +def add_column(alpha, v, beta, M): + singa.floatAddColumn(alpha, beta, v.singa_tensor, M.singa_tensor) + return M + + +def add_row(alpha, v, beta, M): + singa.floatAddRow(alpha, beta, v.singa_tensor, M.singa_tensor) + return M + + +def sum_columns(M): + assert M.ndim() == 2, 'M.nDim() is supposed to be 2' + nb_col = M.shape(0) + ret = Tensor((nb_col, 1)) + singa.SumColumns(M.singa_tensor, ret.singa_tensor) + return ret + + +def sum_rows(M): + assert M.ndim() == 2, 'M.nDim() is supposed to be 2' + nb_row = M.shape(1) + ret = Tensor((1, nb_row)) + singa.SumRows(M.singa_tensor, ret.singa_tensor) + return ret + + +''' private functions, internally used +''' + + +def _tuple_to_vector(tshape): + ''' this function converts tuple to std::vector<int> + ''' + vs = singa.Shape(len(tshape)) + for i in range(len(tshape)): + vs[i] = tshape[i] + return vs + + +def _call_singa_func(_singa_func, *args): + ''' this function calls singa global functions that returns Tensor + and create new python Tensor instance + e.g., Tensor [singa_func](args...) + ''' + new_t = Tensor() + new_t.singa_tensor = _singa_func(*args) + new_t.shape = new_t.singa_tensor.shape() + new_t.device = new_t.singa_tensor.device() + new_t.dtype = new_t.singa_tensor.data_type() + return new_t http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/singa/utils.py ---------------------------------------------------------------------- diff --git a/src/python/singa/utils.py b/src/python/singa/utils.py new file mode 100644 index 0000000..a192cff --- /dev/null +++ b/src/python/singa/utils.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +import sys + + +def update_progress(progress, info): + """Display progress bar and user info. + + Args: + progress (float): progress [0, 1], negative for halt, and >=1 for done. + info (str): a string for user provided info to be displayed. + """ + barLength = 20 # bar length + status = "" + if isinstance(progress, int): + progress = float(progress) + if not isinstance(progress, float): + progress = 0 + status = "error: progress var must be float. " + if progress < 0: + progress = 0 + status = "Halt. " + if progress >= 1: + progress = 1 + status = "Done. " + status = status + info + block = int(round(barLength*progress)) + text = "[{0}] {1:3.1f}% {2}".format("."*block + " "*(barLength-block), + progress*100, status) + sys.stdout.write(text) + sys.stdout.write('\b'*(9 + barLength + len(status))) + sys.stdout.flush() http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/790b7b4c/src/python/swig/core_tensor.i ---------------------------------------------------------------------- diff --git a/src/python/swig/core_tensor.i b/src/python/swig/core_tensor.i index f723d1b..c4ee610 100644 --- a/src/python/swig/core_tensor.i +++ b/src/python/swig/core_tensor.i @@ -24,12 +24,15 @@ %module core_tensor %include "std_vector.i" %include "std_string.i" +%include "std_shared_ptr.i" +/* %include "carrays.i" %array_class(float, floatArray); %array_class(int, intArray); %array_class(char, charArray); %array_class(double, doubleArray); +*/ %{ #define SWIG_FILE_WITH_INIT @@ -39,6 +42,7 @@ #include "singa/proto/model.pb.h" using singa::DataType; %} +%shared_ptr(singa::Device) %include "numpy.i" %init %{ @@ -47,9 +51,15 @@ using singa::DataType; %apply (float *IN_ARRAY1, int DIM1) { (const float *src, const size_t num) } +%apply (int *IN_ARRAY1, int DIM1) { + (const int *src, const size_t num) +} %apply (float *ARGOUT_ARRAY1, int DIM1) { (float *value, const size_t num) } +%apply (int *ARGOUT_ARRAY1, int DIM1) { + (int *value, const size_t num) +} %template(Shape) std::vector<size_t>; @@ -74,18 +84,18 @@ namespace singa{ std::shared_ptr<singa::Device> dev, DataType dtype = kFloat32); Tensor(const Tensor &from); - //Blob *blob() const; std::shared_ptr<singa::Device> device() const; - - template <typename DType> DType data() const; - %template(floatData) data<const float*>; - %template(intData) data<const int*>; - %template(charData) data<const char*>; - %template(doubleData) data<const double*>; +/* + template <typename DType> const DType* data() const; + %template(floatData) data<float>; + %template(intData) data<int>; + %template(charData) data<char>; + %template(doubleData) data<double>; + */ template <typename SType> void GetValue(SType* value, const size_t num); %template(floatGetValue) GetValue<float>; - //void ToArray(float *value, const size_t num); + %template(intGetValue) GetValue<int>; const DataType data_type() const; const std::vector<size_t> &shape() const; @@ -99,7 +109,8 @@ namespace singa{ void AsType(DataType type); void ToDevice(std::shared_ptr<singa::Device> dev); void ToHost(); - float L2(); + float L2() const; + float L1() const; template <typename SType> void SetValue(const SType x); %template(floatSetValue) SetValue<float>; @@ -108,8 +119,9 @@ namespace singa{ template <typename DType> void CopyDataFromHostPtr(const DType *src, const size_t num, - const size_t offset); + const size_t offset = 0); %template(floatCopyDataFromHostPtr) CopyDataFromHostPtr<float>; + %template(intCopyDataFromHostPtr) CopyDataFromHostPtr<int>; // --- other types void CopyData(const Tensor &other); @@ -173,6 +185,7 @@ namespace singa{ /* TODO(chonho-02) need to implement the average of all elements ??? */ Tensor Average(const Tensor &t, int axis); + Tensor SoftMax(const Tensor &t); Tensor Pow(const Tensor &base, const Tensor &exp);
