http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/singa.i ---------------------------------------------------------------------- diff --git a/src/api/singa.i b/src/api/singa.i new file mode 100644 index 0000000..12f46f3 --- /dev/null +++ b/src/api/singa.i @@ -0,0 +1,31 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +/*interface file for swig */ + +%module singa_wrap +%include "config.i" +%include "core_tensor.i" +%include "core_device.i" +%include "model_layer.i" +%include "model_optimizer.i" +%include "model_loss.i" +%include "model_metric.i"
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/setup.py.in ---------------------------------------------------------------------- diff --git a/src/python/setup.py.in b/src/python/setup.py.in deleted file mode 100644 index 881cd30..0000000 --- a/src/python/setup.py.in +++ /dev/null @@ -1,98 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Always prefer setuptools over distutils -from setuptools import setup - - -setup( - name='singa', - - version='${PACKAGE_VERSION}', - - description='A General Deep Learning System', - - url='https://github.com/apache/incubator-singa', - - author='Apache SINGA (incubating)', - author_email='[email protected]', - - license='Apache 2', - - classifiers=[ - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - 'Development Status :: 3 - Alpha', - - 'Intended Audience :: Developers', - 'Topic :: Deep Learning System ', - - 'License :: Apache License', - - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - ], - - keywords='deep learning singa apache', - - packages= ['singa', 'singa.proto'], - - #py_modules=["singa"], - - install_requires=[ - 'numpy>=1.11.0', - 'protobuf>=2.5.0,<3' - ], - - #List additional groups of dependencies here (e.g. development - #dependencies). You can install these using the following syntax, - #for example: - #$ pip install -e .[dev,test] - #extras_require={ - # 'dev': ['check-manifest'], - # 'test': ['coverage'], - #}, - - #If there are data files included in your packages that need to be - #installed, specify them here. If using Python 2.6 or less, then these - #have to be included in MANIFEST.in as well. - - package_data={ - 'singa': ['_singa_wrap.so'], - }, - - #Although 'package_data' is the preferred approach, in some case you may - #need to place data files outside of your packages. See: - #http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa - #In this case, 'data_file' will be installed into '<sys.prefix>/my_data' - #data_files=[('my_data', ['data/data_file'])], - - #To provide executable scripts, use entry points in preference to the - #"scripts" keyword. Entry points provide cross-platform support and allow - #pip to create the appropriate form of executable for the target platform. - - entry_points={ - 'console_scripts': [ - 'singa=singa.command:main', - ], - }, -) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/__init__.py ---------------------------------------------------------------------- diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py deleted file mode 100644 index c81c6ef..0000000 --- a/src/python/singa/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/command.py ---------------------------------------------------------------------- diff --git a/src/python/singa/command.py b/src/python/singa/command.py deleted file mode 100644 index f14c8c5..0000000 --- a/src/python/singa/command.py +++ /dev/null @@ -1,240 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ============================================================================= - -''' -This script is the main entrance for user to run singa inside a model workspace - -To use this script, user sudo install these dependencies: flask pillow and protobuf -''' - -import sys, glob, os, random, shutil, time -from flask import Flask, request, redirect, url_for -import numpy as np -import ConfigParser -import urllib, traceback - - -from argparse import ArgumentParser -from argparse import RawDescriptionHelpFormatter -sys.path.append(os.getcwd()) - -__all__ = [] -__version__ = 0.1 -__date__ = '2016-07-20' -__updated__ = '2016-07-20' -__shortdesc__ = ''' -welcome to singa -''' - -app = Flask(__name__) -config = ConfigParser.RawConfigParser() -service = {} -data_path = "data_" -parameter_path = "parameter_" - -debug = False - -class CLIError(Exception): - '''Generic exception to raise and log different fatal errors.''' - def __init__(self, msg): - super(CLIError).__init__(type(self)) - self.msg = "E: %s" % msg - def __str__(self): - return self.msg - def __unicode__(self): - return self.msg - -def main(argv=None): # IGNORE:C0111 - '''Command line options.''' - - from . import device - - if argv is None: - argv = sys.argv - else: - sys.argv.extend(argv) - - program_name = os.path.basename(sys.argv[0]) - program_version = "v%s" % __version__ - program_build_date = str(__updated__) - program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date) - program_shortdesc = __shortdesc__ - program_license = '''%s - - Created by dbsystem group on %s. - Copyright 2016 NUS School of Computing. All rights reserved. - - Licensed under the Apache License 2.0 - http://www.apache.org/licenses/LICENSE-2.0 - - Distributed on an "AS IS" basis without warranties - or conditions of any kind, either express or implied. - -USAGE -''' % (program_shortdesc, str(__date__)) - - global debug - - try: - # Setup argument parser - parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) - parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000") - parser.add_argument("-param", "--parameter", dest="parameter", help="the parameter file path to be loaded") - parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug") - parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data") - parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu") - parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa") - parser.add_argument('-V', '--version', action='version', version=program_version_message) - - # Process arguments - args = parser.parse_args() - - port = args.port - parameter_file = args.parameter - mode = args.mode - need_reload = args.reload_data - use_cpu = args.use_cpu - debug = args.debug - - #prepare data files - config.read('file.cfg') - file_prepare(need_reload) - - - import network as net - model = net.create() - - #load parameter - parameter_file=get_parameter(parameter_file) - - if parameter_file: - print "load parameter file: %s" % parameter_file - model.load(parameter_file) - - if use_cpu: - raise CLIError("Currently cpu is not support!") - else: - print "runing with gpu" - d = device.create_cuda_gpu() - - model.to_device(d) - - if mode == "serve": - print "runing singa in serve mode, listen to port: %s " % port - global service - from serve import Service - service =Service(model,d) - - app.debug = debug - app.run(host='0.0.0.0', port= port) - elif mode == "train": - print "runing singa in train mode" - global trainer - from train import Trainer - trainer= Trainer(model,d) - if not parameter_file: - trainer.initialize() - trainer.train() - else: - raise CLIError("Currently only serve mode is surpported!") - return 0 - except KeyboardInterrupt: - ### handle keyboard interrupt ### - return 0 - except Exception, e: - if debug: - traceback.print_exc() - raise(e) - indent = len(program_name) * " " - sys.stderr.write(program_name + ": " + str(e) + "\n") - sys.stderr.write(indent + " for help use --help \n\n") - return 2 - -def file_prepare(reload_data=False): - ''' - download all files and generate data.py - ''' - if not reload_data and os.path.exists("data_.py"): - return - - print "download file" - #clean data - shutil.rmtree("data_.py",ignore_errors=True) - shutil.rmtree("data_",ignore_errors=True) - - data_py=open("data_.py",'w') - data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n") - if config.has_section("data"): - file_list = config.items("data") - #download files - for f in file_list: - name,path=download_file(f[0],f[1],data_path) - data_py.write("%s=\"%s\"\n" % (name,path)) - - data_py.flush() - data_py.close() - - if config.has_section("parameter"): - parameter_list = config.items("parameter") - for p in parameter_list: - download_file(p[0],p[1],parameter_path) - -def download_file(name,path,dest): - ''' - download one file to dest - ''' - if not os.path.exists(dest): - os.makedirs(dest) - if (path.startswith('http')): - file_name = path.split('/')[-1] - target = os.path.join(dest,file_name) - urllib.urlretrieve(path,target) - return name,target - - -def get_parameter(file_name=None): - ''' - get the paticular file name or get the last parameter file - ''' - if not os.path.exists(parameter_path): - os.makedirs(parameter_path) - return - - if file_name: - return os.path.join(parameter_path,file_name) - - parameter_list = [ os.path.join(parameter_path,f) for f in os.listdir(parameter_path)] - if len(parameter_list)==0: - return - parameter_list.sort() - - return parameter_list[-1] - [email protected]("/") -def index(): - return "Hello SINGA User!" - [email protected]('/predict', methods=['POST']) -def predict(): - if request.method == 'POST': - try: - response=service.serve(request) - except Exception as e: - return e - return response - return "error, should be post request" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/device.py ---------------------------------------------------------------------- diff --git a/src/python/singa/device.py b/src/python/singa/device.py deleted file mode 100644 index 2d93823..0000000 --- a/src/python/singa/device.py +++ /dev/null @@ -1,123 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ============================================================================= -''' -This script includes Device class and its subclasses for python users -to call singa::Device and its methods. - -TODO(wangwei) implement py CudaGPU class. -''' - -from . import singa_wrap as singa - - -class Device(object): - """ Class and member functions for singa::Device. - - Create Device instances using the CreateXXXDevice. - """ - - def __init__(self, id, device): - """Device constructor given device ID. - - Args: - id (int): device ID. - device: swig shared_ptr<Device> - """ - self.id = id - self.singa_device = device - - def set_rand_seed(self, seed): - self.singa_device.SetRandSeed(seed) - - def get_host(self): - return self.singa_device.host() - - def get_id(self): - return self.singa_device.id() - - -def get_num_gpus(): - return singa.Platform.GetNumGPUs() - - -def get_gpu_ids(): - return singa.Platform.GetGPUIDs() - - -def get_gpu_mem_size(id): - return singa.Platform.GetGPUMemSize(id) - - -def device_query(id, verbose=False): - return singa.Platform.DeviceQuery(id, verbose) - - -def create_cuda_gpus(num): - '''Create a list of CudaGPU devices. - - Args: - num (int): number of device to create. - Returns: - a list of swig converted CudaGPU devices. - ''' - - return singa.Platform.CreateCudaGPUs(num) - - -def create_cuda_gpu(): - '''Create a single CudaGPU device. - - Returns: - a swig converted CudaGPU device. - ''' - - return singa.Platform.CreateCudaGPUs(1)[0] - - -def create_cuda_gpus_on(device_ids): - '''Create a list of CudaGPU devices. - - Args: - device_ids (list): a list of GPU card IDs. - - Returns: - a list of swig converted CudaGPU devices. - ''' - return singa.Platform.CreateCudaGPUsOn(device_ids) - - -def create_cuda_gpu_on(device_id): - '''Create a CudaGPU device on the given device ID. - - Args: - device_id (int): GPU card ID. - - Returns: - a swig converted CudaGPU device. - ''' - devices = create_cuda_gpus_on([device_id]) - return devices[0] - - -default_device = singa.Platform.GetDefaultDevice() - - -def get_default_device(): - '''Get the default host device which is a CppCPU device''' - return default_device - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/initializer.py ---------------------------------------------------------------------- diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py deleted file mode 100644 index fb99663..0000000 --- a/src/python/singa/initializer.py +++ /dev/null @@ -1,122 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ============================================================================= -'''Popular initialization methods for parameter values (Tensor objects). - -Example usages:: - - from singa import tensor - from singa import initializer - - x = tensor.Tensor((3, 5)) - initializer.uniform(x, 3, 5) # use both fan_in and fan_out - initializer.uniform(x, 3, 0) # use only fan_in -''' - -import math - - -def uniform(t, fan_in=0, fan_out=0): - '''Initialize the values of the input tensor following a uniform - distribution with specific bounds. - - Args: - fan_in(int): for the weight Tensor of a convolution layer, - fan_in = nb_channel * kh * kw; for dense layer, - fan_in = input_feature_length - fan_out(int): for the convolution layer weight Tensor, - fan_out = nb_filter * kh * kw; for the weight Tensor of a dense - layer, fan_out = output_feature_length - - Ref: [Bengio and Glorot 2010]: Understanding the difficulty of - training deep feedforward neuralnetworks. - - ''' - assert fan_in > 0 or fan_out > 0, \ - 'fan_in and fan_out cannot be 0 at the same time' - avg = 2 - if fan_in * fan_out == 0: - avg = 1 - x = math.sqrt(3.0 * avg / (fan_in + fan_out)) - t.uniform(-x, x) - - -def gaussian(t, fan_in=0, fan_out=0): - '''Initialize the values of the input tensor following a Gaussian - distribution with specific std. - - Args: - fan_in(int): for the weight Tensor of a convolution layer, - fan_in = nb_channel * kh * kw; for dense layer, - fan_in = input_feature_length - fan_out(int): for the convolution layer weight Tensor, - fan_out = nb_filter * kh * kw; for the weight Tensor of a dense - layer, fan_out = output_feature_length - - Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into - Rectifiers: Surpassing Human-Level Performance on ImageNet Classification - ''' - assert fan_in > 0 or fan_out > 0, \ - 'fan_in and fan_out cannot be 0 at the same time' - avg = 2 - if fan_in * fan_out == 0: - avg = 1 - std = math.sqrt(2.0 * avg / (fan_in + fan_out)) - t.gaussian(0, std) - - -def xavier(t): - '''Initialize the matrix parameter follow a Uniform distribution from - [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))]. - - Deprecated. Please use uniform() - - Args: - t (Tensor): the parater tensor - ''' - - scale = math.sqrt(6.0 / (t.shape[0] + t.shape[1])) - t.uniform(-scale, scale) - - -def glorot(t): - '''Initialize the matrix parameter follow a Gaussian distribution with - mean = 0 and std = sqrt(2.0 / (nb_row + nb_col)) - - Deprecated. Please use gaussian() - - Args: - t (Tensor): the parater tensor - ''' - scale = math.sqrt(2.0 / (t.shape[0] + t.shape[1])) - t.gaussian(0, 1) - t *= scale - - -def msra(t): - '''Initialize the matrix parameter follow a Guassian distribution with - mean = 0, std = math.sqrt(2.0 / nb_row). - - Deprecated. Please use gaussian() - - Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU - nonlinearities. - - Args: - t (Tensor): the parater tensor - ''' - t.gaussian(0, math.sqrt(2.0 / t.shape[0])) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/layer.py ---------------------------------------------------------------------- diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py deleted file mode 100644 index f22b3d1..0000000 --- a/src/python/singa/layer.py +++ /dev/null @@ -1,933 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -""" Python layers wrap the C++ layers to provide simpler construction APIs. - -Example usages:: - - from singa import layer - from singa import tensor - from singa import device - from singa.model_pb2 import kTrain - - layer.engine = 'cudnn' # to use cudnn layers - dev = device.create_cuda_gpu() - - # create a convolution layer - conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 32)) - conv.to_device(dev) # move the layer data onto a CudaGPU device - x = tensor.Tensor((3, 32, 32), dev) - x.uniform(-1, 1) - y = conv.foward(kTrain, x) - - dy = tensor.Tensor() - dy.reset_like(y) - dy.set_value(0.1) - # dp is a list of tensors for parameter gradients - dx, dp = conv.backward(kTrain, dy) -""" - -from sets import Set -from . import singa_wrap -from .proto import model_pb2 -import tensor - - -engine = 'cudnn' -'''engine is the prefix of layer identifier. - -The value could be one of [**'cudnn', 'singacpp', 'singacuda', 'singacl'**], for -layers implemented using the cudnn library, Cpp, Cuda and OpenCL respectively. -For example, CudnnConvolution layer is identified by 'cudnn_convolution'; -'singacpp_convolution' is for Convolution layer; -Some layers' implementation use only Tensor functions, thererfore they are -transparent to the underlying devices. For threse layers, they would have -multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and -singacl_dropout are all for the Dropout layer. In addition, it has an extra -identifier 'singa', i.e. 'singa_dropout' also stands for the Dropout layer. - -engine is case insensitive. Each python layer would create the correct specific -layer using the engine attribute. -''' - - -class Layer(object): - '''Base Python layer class. - - Typically, the life cycle of a layer instance includes: - 1. construct layer without input_sample_shapes, goto 2; - construct layer with input_sample_shapes, goto 3; - 2. call setup to create the parameters and setup other meta fields - 3. call forward or access layer members - 4. call backward and get parameters for update - - Args: - name (str): layer name - ''' - - def __init__(self, name, **kwargs): - self.layer = None # layer converted by swig - self.name = name # TODO(wangwei) duplicate with self.conf.name - self.conf = model_pb2.LayerConf() - self.conf.name = name - self.param_specs = [] - self.has_setup = False - - def param_names(self): - ''' - Returns: - a list of strings, one for the name of one parameter Tensor - ''' - names = [] - for x in self.param_specs: - names.append(x['name']) - return names - - def setup(self, in_shapes): - '''Call the C++ setup function to create params and set some meta data. - - Args: - in_shapes: if the layer accepts a single input Tensor, in_shapes is - a single tuple specifying the inpute Tensor shape; if the layer - accepts multiple input Tensor (e.g., the concatenation layer), - in_shapes is a tuple of tuples, each for one input Tensor - ''' - if self.has_setup: - return - self.layer.Setup(list(in_shapes), - self.conf.SerializeToString()) - self.has_setup = True - - def get_output_sample_shape(self): - '''Called after setup to get the shape of the output sample(s). - - Returns: - a tuple for a single output Tensor or a list of tuples if this layer - has multiple outputs - ''' - assert self.has_setup, \ - 'Must call setup() before get_output_sample_shape()' - return self.layer.GetOutputSampleShape() - - def param_values(self): - '''Return param value tensors. - - Parameter tensors are not stored as layer members because cpp Tensor - could be moved onto diff devices due to the change of layer device, - which would result in inconsistency. - - Returns: - a list of tensors, one for each paramter - ''' - if self.layer is None: - return [] - else: - return tensor.from_raw_tensors(self.layer.param_values()) - - def forward(self, flag, x): - '''Forward propagate through this layer. - - Args: - flag (int): kTrain or kEval - x (Tensor or list<Tensor>): an input tensor if the layer is - connected from a single layer; a list of tensors if the layer - is connected from multiple layers. - - Return: - a tensor if the layer is connected to a single layer; a list of - tensors if the layer is connected to multiple layers; - ''' - assert self.has_setup, 'Must call setup() before forward()' - if type(x) == list: - xs = [] - for t in x: - x.append(t.singa_tensor) - else: - assert isinstance(x, tensor.Tensor), \ - 'input must be a Tensor or a list of Tensor' - xs = x.singa_tensor - y = self.layer.Forward(flag, xs) - if type(y) == list: - return tensor.from_raw_tensors(y) - else: - return tensor.from_raw_tensor(y) - - def backward(self, flag, dy): - '''Backward propagate gradients through this layer. - - Args: - flag (int): for future use. - dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the - objective loss - Return: - <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of x - , dpi is the gradient of the i-th parameter - ''' - if type(dy) == list: - dys = [] - for t in dy: - dys.append(t.singa_tensor) - else: - assert isinstance(dy, tensor.Tensor), \ - 'the input must be a Tensor or a set of Tensor' - dys = dy.singa_tensor - ret = self.layer.Backward(flag, dys) - if type(ret[0]) == list: - dxs = tensor.from_raw_tensors(ret[0]) - else: - dxs = tensor.from_raw_tensor(ret[0]) - return dxs, tensor.from_raw_tensors(ret[1]) - - def to_device(self, device): - '''Move layer state tensors onto the given device. - - Args: - device: swig converted device, created using singa.device - ''' - if self.layer is not None: - self.layer.ToDevice(device) - - def as_type(self, dtype): - pass - - def __copy__(self): - pass - - def __deepcopy__(self): - pass - - -class Conv2D(Layer): - """Construct a layer for 2D convolution. - - Args: - nb_kernels (int): num of the channels (kernels) of the input Tensor - kernel: an integer or a pair of integers for kernel height and width - stride: an integer or a pair of integers for stride height and width - border_mode (string): padding mode, case in-sensitive, - 'valid' -> padding is 0 for height and width - 'same' -> padding is half of the kernel (floor), the kernel must be - odd number. - cudnn_prefer (string): the preferred algorithm for cudnn convolution - which could be 'fatest', 'autotune', 'limited_workspace' and - 'no_workspace' - data_format (string): either 'NCHW' or 'NHWC' - use_bias (bool): True or False - pad: an integer or a pair of integers for padding height and width - W_specs (dict): used to specify the weight matrix specs, fields - include, - 'name' for parameter name - 'lr_mult' for learning rate multiplier - 'decay_mult' for weight decay multiplier - 'init' for init method, which could be 'gaussian', 'uniform', - 'xavier' and '' - 'std', 'mean', 'high', 'low' for corresponding init methods - TODO(wangwei) 'clamp' for gradient constraint, value is scalar - 'regularizer' for regularization, currently support 'l2' - b_specs (dict): hyper-parameters for bias vector, similar as W_specs - name (string): layer name. - input_sample_shape: 3d tuple for the shape of the input Tensor - without the batchsize, e.g., (channel, height, width) or - (height, width, channel) - """ - def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same', - cudnn_prefer='fatest', data_format='NCHW', - use_bias=True, W_specs=None, b_specs=None, - pad=None, input_sample_shape=None): - super(Conv2D, self).__init__(name) - assert data_format == 'NCHW', 'Not supported data format: %s ' \ - 'only "NCHW" is enabled currently' % (data_format) - conf = self.conf.convolution_conf - conf.num_output = nb_kernels - conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad) - conf.bias_term = use_bias - # TODO(wangwei) enable data format for cpp code - # conf.data_format = data_format - if W_specs is None: - W_specs = {'init': 'xavier'} - if b_specs is None: - b_specs = {'init': 'constant'} - if 'name' not in W_specs: - W_specs['name'] = name + '_weight' - if 'name' not in b_specs: - b_specs['name'] = name + '_bias' - wspecs = _construct_param_specs_from_dict(W_specs) - self.conf.param.extend([wspecs]) - self.param_specs.append(wspecs) - bspecs = _construct_param_specs_from_dict(b_specs) - self.conf.param.extend([bspecs]) - self.param_specs.append(bspecs) - - _check_engine(engine, ['cudnn', 'singacpp']) - self.layer = _create_layer(engine, 'Convolution') - if input_sample_shape is not None: - self.setup(input_sample_shape) - - -class Conv1D(Conv2D): - """Construct a layer for 1D convolution. - - Most of the args are the same as those for Conv2D except the kernel, - stride, pad, which is a scalar instead of a tuple. - input_sample_shape is a tuple with a single value for the input feature - length - """ - - def __init__(self, name, nb_kernels, kernel=3, stride=1, - border_mode='same', cudnn_prefer='fatest', - use_bias=True, W_specs={'init': 'Xavier'}, - b_specs={'init': 'Constant', 'value': 0}, pad=None, - input_sample_shape=None): - pad = None - if pad is not None: - pad = (0, pad) - if input_sample_shape is not None: - input_sample_shape = (1, 1, input_sample_shape[0]) - super(Conv1D, self).__init__(name, nb_kernels, (1, kernel), (0, stride), - border_mode, cudnn_prefer, - use_bias=use_bias, pad=pad, - W_specs=W_specs, b_specs=b_specs, - input_sample_shape=input_sample_shape) - - def get_output_sample_shape(self): - shape = self.layer.GetOutputSampleShape() - assert len(shape) == 3, 'The output sample shape should be 3D.'\ - 'But the length is %d' % len(shape) - return (shape[0], shape[2]) - - -class Pooling2D(Layer): - '''2D pooling layer providing max/avg pooling. - - All args are the same as those for Conv2D, except the following one - - Args: - mode: pooling type, model_pb2.PoolingConf.MAX or - model_pb2.PoolingConf.AVE - - ''' - def __init__(self, name, mode, kernel=3, stride=2, border_mode='same', - pad=None, data_format='NCHW', input_sample_shape=None): - super(Pooling2D, self).__init__(name) - assert data_format == 'NCHW', 'Not supported data format: %s ' \ - 'only "NCHW" is enabled currently' % (data_format) - conf = self.conf.pooling_conf - conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad) - conf.pool = mode - _check_engine(engine, ['cudnn', 'singacpp']) - self.layer = _create_layer(engine, 'Pooling') - if input_sample_shape is not None: - self.setup(input_sample_shape) - - -class MaxPooling2D(Pooling2D): - - def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None, - data_format='NCHW', input_sample_shape=None): - super(MaxPooling2D, self).__init__(name, model_pb2.PoolingConf.MAX, - kernel, stride, border_mode, - pad, data_format, input_sample_shape) - - -class AvgPooling2D(Pooling2D): - - def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None, - data_format='NCHW', input_sample_shape=None): - super(AvgPooling2D, self).__init__(name, model_pb2.PoolingConf.AVE, - kernel, stride, border_mode, - pad, data_format, input_sample_shape) - - -class MaxPooling1D(MaxPooling2D): - - def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None, - data_format='NCHW', input_sample_shape=None): - """Max pooling for 1D feature. - - Args: - input_sample_shape (tuple): 1D tuple for input feature length - """ - pad = None - if pad is not None: - pad = (0, pad) - if input_sample_shape is not None: - assert len(input_sample_shape) == 1, \ - 'AvgPooling1D expects input sample to be 1D' - input_sample_shape = (1, 1, input_sample_shape[0]) - else: - input_sample_shape = None - super(MaxPooling1D, self).__init__(name, (1, kernel), (0, stride), - border_mode, pad, - data_format, input_sample_shape) - - def get_output_sample_shape(self): - shape = self.layer.GetOutputSampleShape() - return (shape[2],) - - -class AvgPooling1D(AvgPooling2D): - - def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None, - data_format='NCHW', input_sample_shape=None): - """input_feature_length is a scalar value""" - pad2 = None - if pad is not None: - pad2 = (pad, 0) - if input_sample_shape is not None: - assert len(input_sample_shape) == 1, \ - 'AvgPooling1D expects input sample to be 1D' - input_sample_shape = (1, 1, input_sample_shape[0]) - else: - input_sample_shape = None - - super(AvgPooling1D, self).__init__(name, (kernel, 1), (0, stride), - border_mode, pad2, - data_format, input_sample_shape) - - def get_output_sample_shape(self): - shape = self.layer.GetOutputSampleShape() - return (shape[2],) - - -class BatchNormalization(Layer): - """Batch-normalization. - - Args: - momentum (float): for running average mean and variance. - beta_specs (dict): dictionary includes the fields for the beta - param: - 'name' for parameter name - 'lr_mult' for learning rate multiplier - 'decay_mult' for weight decay multiplier - 'init' for init method, which could be 'gaussian', 'uniform', - 'xavier' and '' - 'std', 'mean', 'high', 'low' for corresponding init methods - 'clamp' for gradient constraint, value is scalar - 'regularizer' for regularization, currently support 'l2' - gamma_specs (dict): similar to beta_specs, but for the gamma param. - name (string): layer name - input_sample_shape (tuple): with at least one integer - """ - def __init__(self, name, momentum=0.9, - beta_specs=None, gamma_specs=None, input_sample_shape=None): - super(BatchNormalization, self).__init__(name) - conf = self.conf.batchnorm_conf - conf.factor = momentum - if beta_specs is None: - beta_specs = {'init': 'Xavier'} - if gamma_specs is None: - gamma_specs = {'init': 'Xavier'} - if 'name' not in beta_specs: - beta_specs['name'] = name + '_beta' - if 'name' not in gamma_specs: - gamma_specs['name'] = name + '_gamma' - mean_specs = {'init': 'constant', 'value': 0, 'name': name+'_mean'} - var_specs = {'init': 'constant', 'value': 1, 'name': name+'_var'} - self.conf.param.extend([_construct_param_specs_from_dict(gamma_specs)]) - self.conf.param.extend([_construct_param_specs_from_dict(beta_specs)]) - self.conf.param.extend([_construct_param_specs_from_dict(mean_specs)]) - self.conf.param.extend([_construct_param_specs_from_dict(var_specs)]) - self.param_specs.append(_construct_param_specs_from_dict(gamma_specs)) - self.param_specs.append(_construct_param_specs_from_dict(beta_specs)) - self.param_specs.append(_construct_param_specs_from_dict(mean_specs)) - self.param_specs.append(_construct_param_specs_from_dict(var_specs)) - _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda', - 'singacl']) - self.layer = _create_layer(engine, 'BatchNorm') - if input_sample_shape is not None: - self.setup(input_sample_shape) - - -class LRN(Layer): - """Local response normalization. - - Args: - size (int): # of channels to be crossed - normalization. - mode (string): 'cross_channel' - input_sample_shape (tuple): 3d tuple, (channel, height, width) - """ - - def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel', - k=1, input_sample_shape=None): - super(LRN, self).__init__(name) - conf = self.conf.lrn_conf - conf.local_size = size - conf.alpha = alpha - conf.beta = beta - conf.k = k - # TODO(wangwei) enable mode = 'within_channel' - assert mode == 'cross_channel', 'only support mode="across_channel"' - conf.norm_region = model_pb2.LRNConf.ACROSS_CHANNELS - _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda', - 'singacl']) - self.layer = _create_layer(engine, 'LRN') - if input_sample_shape is not None: - self.setup(input_sample_shape) - - -class Dense(Layer): - """Apply linear/affine transformation, also called inner-product or - fully connected layer. - - Args: - num_output (int): output feature length. - use_bias (bool): add a bias vector or not to the transformed feature - W_specs (dict): specs for the weight matrix - 'name' for parameter name - 'lr_mult' for learning rate multiplier - 'decay_mult' for weight decay multiplier - 'init' for init method, which could be 'gaussian', 'uniform', - 'xavier' and '' - 'std', 'mean', 'high', 'low' for corresponding init methods - 'clamp' for gradient constraint, value is scalar - 'regularizer' for regularization, currently support 'l2' - b_specs (dict): specs for the bias vector, same fields as W_specs. - W_transpose (bool): if true, output=x*W.T+b; - input_sample_shape (tuple): input feature length - """ - def __init__(self, name, num_output, use_bias=True, - W_specs=None, b_specs=None, - W_transpose=False, input_sample_shape=None): - """Apply linear/affine transformation, also called inner-product or - fully connected layer. - - Args: - num_output (int): output feature length. - use_bias (bool): add a bias vector or not to the transformed feature - W_specs (dict): specs for the weight matrix - 'name' for parameter name - 'lr_mult' for learning rate multiplier - 'decay_mult' for weight decay multiplier - 'init' for init method, which could be 'gaussian', 'uniform', - 'xavier' and '' - 'std', 'mean', 'high', 'low' for corresponding init methods - 'clamp' for gradient constraint, value is scalar - 'regularizer' for regularization, currently support 'l2' - b_specs (dict): specs for the bias vector, same fields as W_specs. - W_transpose (bool): if true, output=x*W.T+b; - input_sample_shape (tuple): input feature length - """ - super(Dense, self).__init__(name) - conf = self.conf.dense_conf - conf.num_output = num_output - conf.bias_term = use_bias - conf.transpose = W_transpose - if W_specs is None: - W_specs = {'init': 'xavier'} - if b_specs is None: - b_specs = {'init': 'constant', 'value': 0} - if 'name' not in W_specs: - W_specs['name'] = name + '_weight' - if 'name' not in b_specs: - b_specs['name'] = name + '_bias' - wspecs = _construct_param_specs_from_dict(W_specs) - bspecs = _construct_param_specs_from_dict(b_specs) - self.conf.param.extend([wspecs, bspecs]) - self.param_specs.extend([wspecs, bspecs]) - # dense layer is transparent to engine. - if engine == 'cudnn': - self.layer = _create_layer('singacuda', 'Dense') - else: - self.layer = _create_layer(engine, 'Dense') - if input_sample_shape is not None: - self.setup(input_sample_shape) - - -class Dropout(Layer): - """Droput layer. - - Args: - p (float): probability for dropping out the element, i.e., set to 0 - name (string): layer name - """ - - def __init__(self, name, p=0.5, input_sample_shape=None): - super(Dropout, self).__init__(name) - conf = self.conf.dropout_conf - conf.dropout_ratio = p - # 'cudnn' works for v>=5.0 - # if engine.lower() == 'cudnn': - # engine = 'cuda' - _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda', - 'singacl']) - self.layer = _create_layer(engine, 'Dropout') - if input_sample_shape is not None: - self.setup(input_sample_shape) - - -class Activation(Layer): - """Activation layers. - - Args: - name (string): layer name - mode (string): 'relu', 'sigmoid', or 'tanh' - input_sample_shape (tuple): shape of a single sample - """ - def __init__(self, name, mode='relu', input_sample_shape=None): - super(Activation, self).__init__(name) - _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl']) - self.conf.type = (engine + '_' + mode).lower() - self.layer = _create_layer(engine, mode) - if input_sample_shape is not None: - self.setup(input_sample_shape) - - -class Softmax(Layer): - """Apply softmax. - - Args: - axis (int): reshape the input as a matrix with the dimension - [0,axis) as the row, the [axis, -1) as the column. - input_sample_shape (tuple): shape of a single sample - """ - def __init__(self, name, axis=1, input_sample_shape=None): - super(Softmax, self).__init__(name) - # conf = self.conf.softmax_conf - # conf.axis = axis - _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacl', - 'singacuda']) - self.layer = _create_layer(engine, 'Softmax') - if input_sample_shape is not None: - self.setup(input_sample_shape) - - -class Flatten(Layer): - """Reshape the input tensor into a matrix. - - Args: - axis (int): reshape the input as a matrix with the dimension - [0,axis) as the row, the [axis, -1) as the column. - input_sample_shape (tuple): shape for a single sample - """ - def __init__(self, name, axis=1, input_sample_shape=None): - super(Flatten, self).__init__(name) - conf = self.conf.flatten_conf - conf.axis = axis - # fltten layer is transparent to engine - if engine == 'cudnn': - self.layer = _create_layer('singacuda', 'Flatten') - else: - self.layer = _create_layer(engine, 'Flatten') - if input_sample_shape is not None: - self.setup(input_sample_shape) - - -class Merge(Layer): - '''Sum all input tensors. - - Args: - input_sample_shape: sample shape of the input. The sample shape of all - inputs should be the same. - ''' - def __init__(self, name, input_sample_shape=None): - self.in_shape = input_sample_shape - self.num_input = 1 - super(Merge, self).__init__(name) - - def setup(self, in_shape): - self.in_shape = in_shape - self.has_setup = True - - def get_output_sample_shape(self): - return self.in_shape - - def forward(self, flag, inputs): - assert len(inputs) > 1, 'There must be multiple input tensors' - self.num_input = len(inputs) - output = tensor.Tensor() - output.reset_like(inputs[0]) - output.set_value(0) - for x in inputs: - output += x - return output - - def backward(self, flag, grad): - assert isinstance(grad, tensor.Tensor), 'The input must be Tensor' - return [grad], [] # * self.num_input - - -class Split(Layer): - '''Replicate the input tensor. - - Args: - num_output (int): number of output tensors to generate. - input_sample_shape: includes a single integer for the input sample - feature size. - ''' - def __init__(self, name, num_output, input_sample_shape=None): - self.num_output = num_output - self.in_shape = input_sample_shape - super(Split, self).__init__(name) - - def setup(self, in_shape): - self.in_shape = in_shape - self.has_setup = True - - def get_output_sample_shape(self): - return self.in_shape - - def forward(self, flag, input): - assert isinstance(input, tensor.Tensor), 'The input must be Tensor' - outputs = [input] * self.num_output - return outputs - - def backward(self, flag, grads): - assert len(grads) > 1, 'There must be multiple gradients' - dx = tensor.Tensor() - dx.reset_like(grads[0]) - dx.set_value(0) - for g in grads: - dx += g - return dx, [] - - -class RNN(Layer): - '''Recurrent layer with 4 types of units, namely lstm, gru, tanh and relu. - - Args: - hidden_size: hidden feature size, the same for all stacks of layers. - rnn_mode: decides the rnn unit, which could be one of 'lstm', 'gru', - 'tanh' and 'relu', refer to cudnn manual for each mode. - num_stacks: num of stacks of rnn layers. It is different to the - unrolling seqence length. - input_mode: 'linear' convert the input feature x by by a linear - transformation to get a feature vector of size hidden_size; - 'skip' does nothing but requires the input feature size equals - hidden_size - bidirection: True for bidirectional RNN - param_specs: config for initializing the RNN parameters. - input_sample_shape: includes a single integer for the input sample - feature size. - ''' - - def __init__(self, name, hidden_size, rnn_mode='lstm', dropout=0.0, - num_stacks=1, input_mode='linear', bidirectional=False, - param_specs=None, input_sample_shape=None): - super(RNN, self).__init__(name) - conf = self.conf.rnn_conf - assert hidden_size > 0, 'Hidden feature size must > 0' - conf.hidden_size = hidden_size - assert rnn_mode in Set(['lstm', 'gru', 'tanh', 'relu']), \ - 'rnn mode %s is not available' % (rnn_mode) - conf.rnn_mode = rnn_mode - conf.num_stacks = num_stacks - conf.dropout = dropout - conf.input_mode = input_mode - conf.direction = 'unidirectional' - if bidirectional: - conf.direction = 'bidirectional' - # currently only has rnn layer implemented using cudnn - _check_engine(engine, ['cudnn']) - if param_specs is None: - param_specs = {'name': name + '-weight', - 'init': 'uniform', 'low': 0, 'high': 1} - self.conf.param.extend([_construct_param_specs_from_dict(param_specs)]) - self.param_specs.append(_construct_param_specs_from_dict(param_specs)) - - self.layer = singa_wrap.CudnnRNN() - if input_sample_shape is not None: - self.setup(input_sample_shape) - - def forward(self, flag, inputs): - '''Forward inputs through the RNN. - - Args: - flag, kTrain or kEval. - inputs, <x1, x2,...xn, hx, cx>, where xi is the input tensor for the - i-th position, its shape is (batch_size, input_feature_length); - the batch_size of xi must >= that of xi+1; hx is the initial - hidden state of shape (num_stacks * bidirection?2:1, batch_size, - hidden_size). cx is the initial cell state tensor of the same - shape as hy. cx is valid for only lstm. For other RNNs there is - no cx. Both hx and cx could be dummy tensors without shape and - data. - - Returns: - <y1, y2, ... yn, hy, cy>, where yi is the output tensor for the i-th - position, its shape is (batch_size, - hidden_size * bidirection?2:1). hy is the final hidden state - tensor. cx is the final cell state tensor. cx is only used for - lstm. - ''' - assert self.has_setup, 'Must call setup() before forward()' - assert len(inputs) > 1, 'The input to RNN must include at '\ - 'least one input tensor '\ - 'and one hidden state tensor (could be a dummy tensor)' - tensors = [] - for t in inputs: - assert isinstance(t, tensor.Tensor), \ - 'input must be py Tensor %s' % (type(t)) - tensors.append(t.singa_tensor) - y = self.layer.Forward(flag, tensors) - return tensor.from_raw_tensors(y) - - def backward(self, flag, grad): - '''Backward gradients through the RNN. - - Args: - flag, for future use. - grad, <dy1, dy2,...dyn, dhy, dcy>, where dyi is the gradient for the - i-th output, its shape is (batch_size, hidden_size*bidirection?2:1); - dhy is the gradient for the final hidden state, its shape is - (num_stacks * bidirection?2:1, batch_size, - hidden_size). dcy is the gradient for the final cell state. - cx is valid only for lstm. For other RNNs there is - no cx. Both dhy and dcy could be dummy tensors without shape and - data. - - Returns: - <dx1, dx2, ... dxn, dhx, dcx>, where dxi is the gradient tensor for - the i-th input, its shape is (batch_size, - input_feature_length). dhx is the gradient for the initial - hidden state. dcx is the gradient for the initial cell state, - which is valid only for lstm. - ''' - tensors = [] - for t in grad: - assert isinstance(t, tensor.Tensor), 'grad must be py Tensor' - tensors.append(t.singa_tensor) - ret = self.layer.Backward(flag, tensors) - return tensor.from_raw_tensors(ret[0]), tensor.from_raw_tensors(ret[1]) - - -class LSTM(RNN): - def __init__(self, name, hidden_size, dropout=0.0, num_stacks=1, - input_mode='linear', bidirectional=False, - param_specs=None, input_sample_shape=None): - super(LSTM, self).__init__(name, hidden_size, 'lstm', dropout, - num_stacks, input_mode, bidirectional, - param_specs, input_sample_shape) - - -class GRU(RNN): - def __init__(self, name, hidden_size, dropout=0.0, num_stacks=1, - input_mode='linear', bidirectional=False, param_specs=None, - input_sample_shape=None): - super(GRU, self).__init__(name, hidden_size, 'gru', dropout, - num_stacks, input_mode, bidirectional, - param_specs, input_sample_shape) - - -def _check_engine(engine, allowed_engines): - assert engine.lower() in Set(allowed_engines), \ - '%s is not a supported engine. Pls use one of %s' % \ - (engine, ', '.join(allowed_engines)) - - -def _create_layer(eng, layer): - ''' create singa wrap layer. - - Both arguments are case insensitive. - Args: - engine, implementation engine, either 'singa' or 'cudnn' - layer, layer type, e.g., 'convolution', 'pooling'; for activation - layers, use the specific activation mode, e.g. 'relu', 'tanh'. - ''' - layer_type = eng + '_' + layer - return singa_wrap.CreateLayer(layer_type.lower()) - - -def _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad): - """Private function called by Convolution2D and Pooling2D.""" - if isinstance(kernel, tuple): - conf.kernel_h = kernel[0] - conf.kernel_w = kernel[1] - else: - conf.kernel_h = kernel - conf.kernel_w = kernel - if isinstance(stride, tuple): - conf.stride_h = stride[0] - conf.stride_w = stride[1] - else: - conf.stride_h = stride - conf.stride_w = stride - mode = border_mode.lower() - if pad is None: - # TODO(wangwei) check the border mode - if mode == 'same': - assert conf.kernel_h % 2 == 1 and conf.kernel_w % 2 == 1, \ - 'Must use odd kernel for mode="same", kernel is (%d, %d)' % ( - conf.kernel_h, conf.kernel_w) - pad = (conf.kernel_h / 2, conf.kernel_w / 2) - elif mode == 'valid': - pad = (0, 0) - else: - assert False, ('Unsupported border_mode: %s. ' - 'Please use {"valid", "same"}' % border_mode) - assert isinstance(pad, tuple), 'pad should be a tuple' - if isinstance(pad, tuple): - conf.pad_h = pad[0] - conf.pad_w = pad[1] - else: - conf.pad_h = pad - conf.pad_w = pad - return conf - - -def _construct_param_specs_from_dict(specs): - """Conver the param specs from a dict into ParamSpec protobuf object. - - Args: - specs (dict): the fields inlcude - 'name' for parameter name - 'lr_mult' for learning rate multiplier; - 'decay_mult' for weight decay multiplier; - 'init' for init method, which could be 'gaussian', 'uniform', - 'xavier' and 'msra'; - 'std', 'mean', 'high', 'low' are used by corresponding init methods; - 'constraint' for gradient constraint, value is a float threshold for - clampping the gradient. - 'regularizer' for regularization, currently support 'l2', value is a - float for the coefficient. - - Returns: - a ParamSpec object - """ - conf = model_pb2.ParamSpec() - if 'name' in specs: - conf.name = specs['name'] - if 'lr_mult' in specs: - conf.lr_mult = specs['lr_mult'] - if 'decay_mult' in specs: - conf.decay_mult = specs['decay_mult'] - if 'init' in specs: - filler = conf.filler - filler.type = specs['init'].lower() - if specs['init'].lower() == 'uniform': - assert 'low' in specs and 'high' in specs, \ - 'low and high are required for "uniform" init method' - filler.min = specs['low'] - filler.max = specs['high'] - elif specs['init'].lower() == 'gaussian': - assert 'mean' in specs and 'std' in specs, \ - 'std and mean are required for "gaussian" init method' - filler.mean = specs['mean'] - filler.std = specs['std'] - elif specs['init'].lower() == 'constant' and 'value' in specs: - filler.value = specs['value'] - if 'regularizer' in specs: - conf.regularizer.coefficient = specs['regularizer'] - if 'constraint' in specs: - conf.constraint.threshold = specs['constraint'] - return conf - - -def get_layer_list(): - """ Return a list of strings which include the identifiers (tags) of all - supported layers - """ - return singa_wrap.GetRegisteredLayers() http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/loss.py ---------------------------------------------------------------------- diff --git a/src/python/singa/loss.py b/src/python/singa/loss.py deleted file mode 100644 index c88290b..0000000 --- a/src/python/singa/loss.py +++ /dev/null @@ -1,141 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ============================================================================= - -''' -Loss module includes a set of training loss implmentations. Some are converted -from C++ implementation, and the rest are implemented directly using python -Tensor. - -Example usage:: - - from singa import tensor - from singa import loss - from singa.proto import model_pb2 - - x = tensor.Tensor((3, 5)) - x.uniform(0, 1) # randomly genearte the prediction activation - y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int)) # set the truth - - f = loss.SoftmaxCrossEntropy() - l = f.forward(model_pb2.kTrain, x, y) # l is tensor with 3 loss values - g = f.backward() # g is a tensor containing all gradients of x w.r.t l -''' - - -from . import singa_wrap as singa -import tensor - - -class Loss(object): - '''Base loss class. - - Subclasses that wrap the C++ loss classes can use the inherited foward, - backward, and evaluate functions of this base class. Other subclasses need - to override these functions - ''' - - def __init__(self): - self.swig_loss = None - - def forward(self, flag, x, y): - '''Compute the loss values. - - Args: - flag (int): kTrain or kEval. If it is kTrain, then the backward - function must be called before calling forward again. - x (Tensor): the prediction Tensor - y (Tensor): the ground truch Tensor, x.shape[0] must = y.shape[0] - - Returns: - a tensor of floats for the loss values, one per sample - ''' - return tensor.from_raw_tensor( - self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor)) - - def backward(self): - ''' - Returns: - the grad of x w.r.t. the loss - ''' - return tensor.from_raw_tensor(self.swig_loss.Backward()) - - def evaluate(self, flag, x, y): # TODO(wangwei) remove flag - ''' - Args: - flag (int): must be kEval, to be removed - x (Tensor): the prediction Tensor - y (Tensor): the ground truth Tnesor - - Returns: - the averaged loss for all samples in x. - ''' - return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor) - - -class SoftmaxCrossEntropy(Loss): - '''This loss function is a combination of SoftMax and Cross-Entropy loss. - - It converts the inputs via SoftMax function and then - computes the cross-entropy loss against the ground truth values. - ''' - - def __init__(self): - self.swig_loss = singa.SoftmaxCrossEntropy() - - -class SquaredError(Loss): - '''This loss evaluates the squared error between the prediction and the - truth values. - - It is implemented using Python Tensor operations. - ''' - def __init__(self): - super(Loss, SquaredError).__init__() - self.err = None - - def forward(self, flag, x, y): - '''Compute the error as 0.5 * ||x-y||^2. - - Args: - flag (int): kTrain or kEval; if kTrain, then the backward must be - called before calling forward again. - x (Tensor): the prediction Tensor - y (Tensor): the truth Tensor, an integer value per sample, whose - value is [0, x.shape[1]) - - Returns: - a Tensor with one error value per sample - ''' - self.err = x - y - return 0.5 * tensor.squared(self.err) - - def backward(self): - '''Compute the gradient of x w.r.t the error. - - Returns: - x - y - ''' - return self.err - - def evaluate(self, flag, x, y): - '''Compuate the averaged error. - - Returns: - a float value as the averaged error - ''' - return tensor.sum(0.5 * tensor.squared(x - y)) / x.size() http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/metric.py ---------------------------------------------------------------------- diff --git a/src/python/singa/metric.py b/src/python/singa/metric.py deleted file mode 100644 index 3a5750d..0000000 --- a/src/python/singa/metric.py +++ /dev/null @@ -1,85 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ============================================================================= -'''This module includes a set of metric classes for evaluating the model's -performance. The specific metric classes could be converted from C++ -implmentation or implemented directly using Python. - - -Example usage:: - - from singa import tensor - from singa import metric - - x = tensor.Tensor((3, 5)) - x.uniform(0, 1) # randomly genearte the prediction activation - x = tensor.SoftMax(x) # normalize the prediction into probabilities - y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int)) # set the truth - - f = metric.Accuracy() - acc = f.evaluate(x, y) # averaged accuracy over all 3 samples in x - -''' - -from . import singa_wrap as singa -import tensor - - -class Metric(object): - '''Base metric class. - - Subclasses that wrap the C++ loss classes can use the inherited foward, - and evaluate functions of this base class. Other subclasses need - to override these functions. Users need to feed in the **predictions** and - ground truth to get the metric values. - ''' - - def __init__(self): - self.swig_metric = None - - def forward(self, x, y): - '''Compute the metric for each sample. - - Args: - x (Tensor): predictions, one row per sample - y (Tensor): ground truth values, one row per sample - - Returns: - a tensor of floats, one per sample - ''' - return tensor.from_raw_tensor( - self.swig_metric.Forward(x.singa_tensor, y.singa_tensor)) - - def evaluate(self, x, y): - '''Compute the averaged metric over all samples. - - Args: - x (Tensor): predictions, one row per sample - y (Tensor): ground truth values, one row per sample - Returns: - a float value for the averaged metric - ''' - return self.swig_metric.Evaluate(x.singa_tensor, y.singa_tensor) - - -class Accuracy(Metric): - '''Compute the top one accuracy for singel label prediction tasks. - - It calls the C++ functions to do the calculation. - ''' - def __init__(self): - self.swig_metric = singa.Accuracy() http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/model.py ---------------------------------------------------------------------- diff --git a/src/python/singa/model.py b/src/python/singa/model.py deleted file mode 100644 index 38d9950..0000000 --- a/src/python/singa/model.py +++ /dev/null @@ -1,21 +0,0 @@ -#/** -# * Licensed to the Apache Software Foundation (ASF) under one -# * or more contributor license agreements. See the NOTICE file -# * distributed with this work for additional information -# * regarding copyright ownership. The ASF licenses this file -# * to you under the Apache License, Version 2.0 (the -# * "License"); you may not use this file except in compliance -# * with the License. You may obtain a copy of the License at -# * -# * http://www.apache.org/licenses/LICENSE-2.0 -# * -# * Unless required by applicable law or agreed to in writing, software -# * distributed under the License is distributed on an "AS IS" BASIS, -# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# * See the License for the specific language governing permissions and -# * limitations under the License. -# */ - -class Model(Object): - pass - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/net.py ---------------------------------------------------------------------- diff --git a/src/python/singa/net.py b/src/python/singa/net.py deleted file mode 100644 index 0026953..0000000 --- a/src/python/singa/net.py +++ /dev/null @@ -1,213 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -""" -Nerual net class for constructing the nets using layers and providing access -functions for net info, e.g., parameters. -""" - - -from .proto.model_pb2 import kTrain, kEval -import tensor -import layer -import cPickle as pickle - - -class FeedForwardNet(object): - - def __init__(self, loss=None, metric=None): - self.loss = loss - self.metric = metric - self.layers = [] - self.src_of_layer = {} - self.dst_of_layer = None - self.ordered_layers = None - - def to_device(self, dev): - for lyr in self.layers: - lyr.to_device(dev) - - def add(self, lyr, src=None): - """Append a layer into the layer list. - - This function will get the sample shape from the last layer to setup - the newly added layer. For the first layer, it is setup outside. - The calling function should ensure the correctness of the layer order. - - Args: - lyr (Layer): the layer to be added - """ - if src is not None: - if isinstance(src, layer.Layer): - assert src.has_setup is True, 'the source layer must be set up' - self.src_of_layer[lyr.name] = [src] - else: - assert type(src) == list, 'the src must be a list of layers' - self.src_of_layer[lyr.name] = src - # print 'merge------', len(src) - else: - assert len(self.layers) > 0 or lyr.has_setup, \ - 'Source layers are needed to set up this layer' - if len(self.layers) > 0: - self.src_of_layer[lyr.name] = [self.layers[-1]] - else: - self.src_of_layer[lyr.name] = [] - if lyr.has_setup is False: - # print shape - in_shape = self.src_of_layer[lyr.name][0].get_output_sample_shape() - lyr.setup(in_shape) - print lyr.name, lyr.get_output_sample_shape() - self.layers.append(lyr) - return lyr - - def param_values(self): - values = [] - layers = self.layers - if self.ordered_layers is not None: - layers = self.ordered_layers - for lyr in layers: - values.extend(lyr.param_values()) - return values - - def param_specs(self): - specs = [] - layers = self.layers - if self.ordered_layers is not None: - layers = self.ordered_layers - for lyr in layers: - specs.extend(lyr.param_specs) - return specs - - def param_names(self): - return [spec.name for spec in self.param_specs()] - - def train(self, x, y): - out = self.forward(kTrain, x) - l = self.loss.forward(kTrain, out, y) - if self.metric is not None: - m = self.metric.evaluate(out, y) - return self.backward(), (l.l1(), m) - - def evaluate(self, x, y): - """Evaluate the loss and metric of the given data""" - out = self.forward(kEval, x) - l = None - m = None - assert self.loss is not None or self.metric is not None,\ - 'Cannot do evaluation, as neither loss nor metic is set' - if self.loss is not None: - l = self.loss.evaluate(kEval, out, y) - if self.metric is not None: - m = self.metric.evaluate(out, y) - return l, m - - def predict(self, x): - xx = self.forward(kEval, x) - return tensor.softmax(xx) - - def topo_sort(self, cur, src_of_layer, visited=None, order=None): - if visited is None: - visited = {} - for name in src_of_layer.keys(): - visited[name] = False - order = [] - srcs = src_of_layer[cur.name] - for src in srcs: - if visited[src.name] is False: - visited[src.name] = True - self.topo_sort(src, src_of_layer, visited, order) - order.append(cur) - visited[cur.name] = True - return order - - def forward(self, flag, x): - # print x.l1() - if self.ordered_layers is None: - self.ordered_layers = self.topo_sort(self.layers[-1], - self.src_of_layer) - inputs = [x] - output_of_layer = {} - for cur in self.ordered_layers: - srcs = self.src_of_layer[cur.name] - disp_src = cur.name + '<--' - for src in srcs: - outs = output_of_layer[src.name] - if type(outs) == list: - inputs.append(outs[0]) - else: - inputs.append(outs) - disp_src += '+' + src.name - # del output_of_layer[src.name] - # print disp_src - if len(inputs) == 1: - inputs = inputs[0] - output_of_layer[cur.name] = cur.forward(flag, inputs) - inputs = [] - # print lyr.name, x.l1() - # print output_of_layer - return output_of_layer[self.ordered_layers[-1].name] - - def backward(self): - if self.dst_of_layer is None: - self.dst_of_layer = {} - for cur in self.layers: - self.dst_of_layer[cur.name] = [] - for cur in self.ordered_layers[1:]: - srcs = self.src_of_layer[cur.name] - for src in srcs: - self.dst_of_layer[src.name].append(cur) - grad = self.loss.backward() - if len(grad.shape) > 1: - grad /= grad.shape[0] # average across the batch - # print 'grad', grad.l1() - grads = [grad] - output_of_layer = {} - pgrads = [] - for cur in reversed(self.ordered_layers): - for dst in self.dst_of_layer[cur.name]: - outputs = output_of_layer[dst.name] - if type(outputs) == list: - grads.append(outputs[0]) - else: - grads.append(outputs) - # del output_of_layer[dst.name] - if len(grads) == 1: - grads = grads[0] - outs, _pgrads = cur.backward(kTrain, grads) - pgrads.append(_pgrads) - output_of_layer[cur.name] = outs - grads = [] - - ret = [] - for pgrad in reversed(pgrads): - ret.extend(pgrad) - return ret - - def save(self, f): - """Save model parameters using cpickle""" - params = {} - for (specs, val) in zip(self.param_specs(), self.param_values()): - val.to_host() - params[specs.name] = tensor.to_numpy(val) - with open(f, 'wb') as fd: - pickle.dump(params, fd) - - def load(self, f): - """Load model parameters using cpickle""" - with open(f, 'rb') as fd: - params = pickle.load(fd) - for (specs, val) in zip(self.param_specs(), self.param_values()): - val.copy_from_numpy(params[specs.name])
