http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/api/singa.i
----------------------------------------------------------------------
diff --git a/src/api/singa.i b/src/api/singa.i
new file mode 100644
index 0000000..12f46f3
--- /dev/null
+++ b/src/api/singa.i
@@ -0,0 +1,31 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/*interface file for swig */
+
+%module singa_wrap
+%include "config.i"
+%include "core_tensor.i"
+%include "core_device.i"
+%include "model_layer.i"
+%include "model_optimizer.i"
+%include "model_loss.i"
+%include "model_metric.i"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/setup.py.in
----------------------------------------------------------------------
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
deleted file mode 100644
index 881cd30..0000000
--- a/src/python/setup.py.in
+++ /dev/null
@@ -1,98 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# 
-
-# Always prefer setuptools over distutils
-from setuptools import setup
-
-
-setup(
-    name='singa',
-
-    version='${PACKAGE_VERSION}',
-
-    description='A General Deep Learning System',
-
-    url='https://github.com/apache/incubator-singa',
-
-    author='Apache SINGA (incubating)',
-    author_email='[email protected]',
-
-    license='Apache 2',
-
-    classifiers=[
-        #   3 - Alpha
-        #   4 - Beta
-        #   5 - Production/Stable
-        'Development Status :: 3 - Alpha',
-
-        'Intended Audience :: Developers',
-        'Topic :: Deep Learning System ',
-
-        'License :: Apache License',
-
-        # Specify the Python versions you support here. In particular, ensure
-        # that you indicate whether you support Python 2, Python 3 or both.
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.6',
-        'Programming Language :: Python :: 2.7',
-        ],
-
-    keywords='deep learning singa apache',
-
-    packages= ['singa', 'singa.proto'],
-
-    #py_modules=["singa"],
-
-    install_requires=[
-        'numpy>=1.11.0',
-        'protobuf>=2.5.0,<3'
-        ],
-
-    #List additional groups of dependencies here (e.g. development
-    #dependencies). You can install these using the following syntax,
-    #for example:
-    #$ pip install -e .[dev,test]
-    #extras_require={
-    #   'dev': ['check-manifest'],
-    #   'test': ['coverage'],
-    #},
-
-    #If there are data files included in your packages that need to be
-    #installed, specify them here.  If using Python 2.6 or less, then these
-    #have to be included in MANIFEST.in as well.
-
-    package_data={
-        'singa': ['_singa_wrap.so'],
-    },
-
-    #Although 'package_data' is the preferred approach, in some case you may
-    #need to place data files outside of your packages. See:
-    
#http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files
 # noqa
-    #In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
-    #data_files=[('my_data', ['data/data_file'])],
-
-    #To provide executable scripts, use entry points in preference to the
-    #"scripts" keyword. Entry points provide cross-platform support and allow
-    #pip to create the appropriate form of executable for the target platform.
-
-    entry_points={
-        'console_scripts': [
-            'singa=singa.command:main',
-        ],
-    },
-)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
deleted file mode 100644
index c81c6ef..0000000
--- a/src/python/singa/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# 
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/command.py
----------------------------------------------------------------------
diff --git a/src/python/singa/command.py b/src/python/singa/command.py
deleted file mode 100644
index f14c8c5..0000000
--- a/src/python/singa/command.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-
-'''
-This script is the main entrance for user to run singa inside a model workspace
-
-To use this script, user sudo install these dependencies: flask pillow and 
protobuf
-'''
-
-import sys, glob, os, random, shutil, time
-from flask import Flask, request, redirect, url_for
-import numpy as np
-import ConfigParser
-import urllib, traceback
-
-
-from argparse import ArgumentParser
-from argparse import RawDescriptionHelpFormatter
-sys.path.append(os.getcwd())
-
-__all__ = []
-__version__ = 0.1
-__date__ = '2016-07-20'
-__updated__ = '2016-07-20'
-__shortdesc__ = '''
-welcome to singa
-'''
-
-app = Flask(__name__)
-config = ConfigParser.RawConfigParser()
-service = {}
-data_path = "data_"
-parameter_path = "parameter_"
-
-debug = False
-
-class CLIError(Exception):
-    '''Generic exception to raise and log different fatal errors.'''
-    def __init__(self, msg):
-        super(CLIError).__init__(type(self))
-        self.msg = "E: %s" % msg
-    def __str__(self):
-        return self.msg
-    def __unicode__(self):
-        return self.msg
-
-def main(argv=None): # IGNORE:C0111
-    '''Command line options.'''
-
-    from . import device
-
-    if argv is None:
-        argv = sys.argv
-    else:
-        sys.argv.extend(argv)
-
-    program_name = os.path.basename(sys.argv[0])
-    program_version = "v%s" % __version__
-    program_build_date = str(__updated__)
-    program_version_message = '%%(prog)s %s (%s)' % (program_version, 
program_build_date)
-    program_shortdesc = __shortdesc__
-    program_license = '''%s
-
-  Created by dbsystem group on %s.
-  Copyright 2016 NUS School of Computing. All rights reserved.
-
-  Licensed under the Apache License 2.0
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Distributed on an "AS IS" basis without warranties
-  or conditions of any kind, either express or implied.
-
-USAGE
-''' % (program_shortdesc, str(__date__))
-
-    global debug
-
-    try:
-        # Setup argument parser
-        parser = ArgumentParser(description=program_license, 
formatter_class=RawDescriptionHelpFormatter)
-        parser.add_argument("-p", "--port", dest="port", default=5000, 
help="the port to listen to, default is 5000")
-        parser.add_argument("-param", "--parameter", dest="parameter",  
help="the parameter file path to be loaded")
-        parser.add_argument("-D", "--debug", dest="debug", 
action="store_true", help="whether need to debug")
-        parser.add_argument("-R", "--reload", dest="reload_data", 
action="store_true", help="whether need to reload data")
-        parser.add_argument("-C", "--cpu", dest="use_cpu", 
action="store_true", help="Using cpu or not, default is using gpu")
-        parser.add_argument("-m", "--mode", dest="mode", 
choices=['train','test','serve'], default='serve', help="On Which mode 
(train,test,serve) to run singa")
-        parser.add_argument('-V', '--version', action='version', 
version=program_version_message)
-
-        # Process arguments
-        args = parser.parse_args()
-
-        port = args.port
-        parameter_file = args.parameter
-        mode = args.mode
-        need_reload = args.reload_data
-        use_cpu = args.use_cpu
-        debug = args.debug
-
-        #prepare data files
-        config.read('file.cfg')
-        file_prepare(need_reload)
-
-
-        import network as net
-        model = net.create()
-
-        #load parameter
-        parameter_file=get_parameter(parameter_file)
-
-        if parameter_file:
-            print "load parameter file: %s" % parameter_file
-            model.load(parameter_file)
-
-        if use_cpu:
-            raise CLIError("Currently cpu is not support!")
-        else:
-            print "runing with gpu"
-            d = device.create_cuda_gpu()
-
-        model.to_device(d)
-
-        if mode == "serve":
-            print "runing singa in serve mode, listen to  port: %s " % port
-            global service
-            from serve import Service
-            service =Service(model,d)
-
-            app.debug = debug
-            app.run(host='0.0.0.0', port= port)
-        elif mode == "train":
-            print "runing singa in train mode"
-            global trainer
-            from train import Trainer
-            trainer= Trainer(model,d)
-            if not parameter_file:
-                trainer.initialize()
-            trainer.train()
-        else:
-            raise CLIError("Currently only serve mode is surpported!")
-        return 0
-    except KeyboardInterrupt:
-        ### handle keyboard interrupt ###
-        return 0
-    except Exception, e:
-        if debug:
-            traceback.print_exc()
-            raise(e)
-        indent = len(program_name) * " "
-        sys.stderr.write(program_name + ": " + str(e) + "\n")
-        sys.stderr.write(indent + "  for help use --help \n\n")
-        return 2
-
-def file_prepare(reload_data=False):
-    '''
-        download all files and generate data.py
-    '''
-    if not reload_data and os.path.exists("data_.py"):
-        return
-
-    print "download file"
-    #clean data
-    shutil.rmtree("data_.py",ignore_errors=True)
-    shutil.rmtree("data_",ignore_errors=True)
-
-    data_py=open("data_.py",'w')
-    data_py.write("#%s" % "This file is Generated by SINGA, please don't 
edit\n\n")
-    if config.has_section("data"):
-        file_list = config.items("data")
-        #download files
-        for f in file_list:
-            name,path=download_file(f[0],f[1],data_path)
-            data_py.write("%s=\"%s\"\n" % (name,path))
-
-    data_py.flush()
-    data_py.close()
-
-    if config.has_section("parameter"):
-        parameter_list = config.items("parameter")
-        for p in parameter_list:
-            download_file(p[0],p[1],parameter_path)
-
-def download_file(name,path,dest):
-    '''
-    download one file to dest
-    '''
-    if not os.path.exists(dest):
-        os.makedirs(dest)
-    if (path.startswith('http')):
-        file_name = path.split('/')[-1]
-        target = os.path.join(dest,file_name)
-        urllib.urlretrieve(path,target)
-    return name,target
-
-
-def get_parameter(file_name=None):
-    '''
-    get the paticular file name or get the last parameter file
-    '''
-    if not os.path.exists(parameter_path):
-        os.makedirs(parameter_path)
-        return
-
-    if file_name:
-       return os.path.join(parameter_path,file_name)
-
-    parameter_list = [ os.path.join(parameter_path,f) for f in 
os.listdir(parameter_path)]
-    if len(parameter_list)==0:
-        return
-    parameter_list.sort()
-
-    return parameter_list[-1]
-
[email protected]("/")
-def index():
-    return "Hello SINGA User!"
-
[email protected]('/predict', methods=['POST'])
-def predict():
-    if request.method == 'POST':
-        try:
-            response=service.serve(request)
-        except Exception as e:
-            return e
-        return response
-    return "error, should be post request"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
deleted file mode 100644
index 2d93823..0000000
--- a/src/python/singa/device.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-'''
-This script includes Device class and its subclasses for python users
-to call singa::Device and its methods.
-
-TODO(wangwei) implement py CudaGPU class.
-'''
-
-from . import singa_wrap as singa
-
-
-class Device(object):
-    """ Class and member functions for singa::Device.
-
-    Create Device instances using the CreateXXXDevice.
-    """
-
-    def __init__(self, id, device):
-        """Device constructor given device ID.
-
-        Args:
-            id (int): device ID.
-            device: swig shared_ptr<Device>
-        """
-        self.id = id
-        self.singa_device = device
-
-    def set_rand_seed(self, seed):
-        self.singa_device.SetRandSeed(seed)
-
-    def get_host(self):
-        return self.singa_device.host()
-
-    def get_id(self):
-        return self.singa_device.id()
-
-
-def get_num_gpus():
-    return singa.Platform.GetNumGPUs()
-
-
-def get_gpu_ids():
-    return singa.Platform.GetGPUIDs()
-
-
-def get_gpu_mem_size(id):
-    return singa.Platform.GetGPUMemSize(id)
-
-
-def device_query(id, verbose=False):
-    return singa.Platform.DeviceQuery(id, verbose)
-
-
-def create_cuda_gpus(num):
-    '''Create a list of CudaGPU devices.
-
-    Args:
-        num (int): number of device to create.
-    Returns:
-        a list of swig converted CudaGPU devices.
-    '''
-
-    return singa.Platform.CreateCudaGPUs(num)
-
-
-def create_cuda_gpu():
-    '''Create a single CudaGPU device.
-
-    Returns:
-        a swig converted CudaGPU device.
-    '''
-
-    return singa.Platform.CreateCudaGPUs(1)[0]
-
-
-def create_cuda_gpus_on(device_ids):
-    '''Create a list of CudaGPU devices.
-
-    Args:
-        device_ids (list): a list of GPU card IDs.
-
-    Returns:
-        a list of swig converted CudaGPU devices.
-    '''
-    return singa.Platform.CreateCudaGPUsOn(device_ids)
-
-
-def create_cuda_gpu_on(device_id):
-    '''Create a CudaGPU device on the given device ID.
-
-    Args:
-        device_id (int): GPU card ID.
-
-    Returns:
-        a swig converted CudaGPU device.
-    '''
-    devices = create_cuda_gpus_on([device_id])
-    return devices[0]
-
-
-default_device = singa.Platform.GetDefaultDevice()
-
-
-def get_default_device():
-    '''Get the default host device which is a CppCPU device'''
-    return default_device
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
deleted file mode 100644
index fb99663..0000000
--- a/src/python/singa/initializer.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-'''Popular initialization methods for parameter values (Tensor objects).
-
-Example usages::
-
-    from singa import tensor
-    from singa import initializer
-
-    x = tensor.Tensor((3, 5))
-    initializer.uniform(x, 3, 5) # use both fan_in and fan_out
-    initializer.uniform(x, 3, 0)  # use only fan_in
-'''
-
-import math
-
-
-def uniform(t, fan_in=0, fan_out=0):
-    '''Initialize the values of the input tensor following a uniform
-    distribution with specific bounds.
-
-    Args:
-        fan_in(int): for the weight Tensor of a convolution layer,
-            fan_in = nb_channel * kh * kw; for dense layer,
-            fan_in = input_feature_length
-        fan_out(int): for the convolution layer weight Tensor,
-            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
-            layer, fan_out = output_feature_length
-
-    Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
-    training deep feedforward neuralnetworks.
-
-    '''
-    assert fan_in > 0 or fan_out > 0, \
-        'fan_in and fan_out cannot be 0 at the same time'
-    avg = 2
-    if fan_in * fan_out == 0:
-        avg = 1
-    x = math.sqrt(3.0 * avg / (fan_in + fan_out))
-    t.uniform(-x, x)
-
-
-def gaussian(t, fan_in=0, fan_out=0):
-    '''Initialize the values of the input tensor following a Gaussian
-    distribution with specific std.
-
-    Args:
-        fan_in(int): for the weight Tensor of a convolution layer,
-            fan_in = nb_channel * kh * kw; for dense layer,
-            fan_in = input_feature_length
-        fan_out(int): for the convolution layer weight Tensor,
-            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
-            layer, fan_out = output_feature_length
-
-    Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
-    Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
-    '''
-    assert fan_in > 0 or fan_out > 0, \
-        'fan_in and fan_out cannot be 0 at the same time'
-    avg = 2
-    if fan_in * fan_out == 0:
-        avg = 1
-    std = math.sqrt(2.0 * avg / (fan_in + fan_out))
-    t.gaussian(0, std)
-
-
-def xavier(t):
-    '''Initialize the matrix parameter follow a Uniform distribution from
-    [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
-
-    Deprecated. Please use uniform()
-
-    Args:
-        t (Tensor): the parater tensor
-    '''
-
-    scale = math.sqrt(6.0 / (t.shape[0] + t.shape[1]))
-    t.uniform(-scale, scale)
-
-
-def glorot(t):
-    '''Initialize the matrix parameter follow a Gaussian distribution with
-    mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
-
-    Deprecated. Please use gaussian()
-
-    Args:
-        t (Tensor): the parater tensor
-    '''
-    scale = math.sqrt(2.0 / (t.shape[0] + t.shape[1]))
-    t.gaussian(0, 1)
-    t *= scale
-
-
-def msra(t):
-    '''Initialize the matrix parameter follow a Guassian distribution with
-    mean = 0, std = math.sqrt(2.0 / nb_row).
-
-    Deprecated. Please use gaussian()
-
-    Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
-    nonlinearities.
-
-    Args:
-        t (Tensor): the parater tensor
-    '''
-    t.gaussian(0, math.sqrt(2.0 / t.shape[0]))

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
deleted file mode 100644
index f22b3d1..0000000
--- a/src/python/singa/layer.py
+++ /dev/null
@@ -1,933 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-""" Python layers wrap the C++ layers to provide simpler construction APIs.
-
-Example usages::
-
-    from singa import layer
-    from singa import tensor
-    from singa import device
-    from singa.model_pb2 import kTrain
-
-    layer.engine = 'cudnn'  # to use cudnn layers
-    dev = device.create_cuda_gpu()
-
-    # create a convolution layer
-    conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 
32))
-    conv.to_device(dev)  # move the layer data onto a CudaGPU device
-    x = tensor.Tensor((3, 32, 32), dev)
-    x.uniform(-1, 1)
-    y = conv.foward(kTrain, x)
-
-    dy = tensor.Tensor()
-    dy.reset_like(y)
-    dy.set_value(0.1)
-    # dp is a list of tensors for parameter gradients
-    dx, dp = conv.backward(kTrain, dy)
-"""
-
-from sets import Set
-from . import singa_wrap
-from .proto import model_pb2
-import tensor
-
-
-engine = 'cudnn'
-'''engine is the prefix of layer identifier.
-
-The value could be one of [**'cudnn', 'singacpp', 'singacuda', 'singacl'**], 
for
-layers implemented using the cudnn library, Cpp, Cuda and OpenCL respectively.
-For example, CudnnConvolution layer is identified by 'cudnn_convolution';
-'singacpp_convolution' is for Convolution layer;
-Some layers' implementation use only Tensor functions, thererfore they are
-transparent to the underlying devices. For threse layers, they would have
-multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and
-singacl_dropout are all for the Dropout layer. In addition, it has an extra
-identifier 'singa', i.e. 'singa_dropout' also stands for the Dropout layer.
-
-engine is case insensitive. Each python layer would create the correct specific
-layer using the engine attribute.
-'''
-
-
-class Layer(object):
-    '''Base Python layer class.
-
-    Typically, the life cycle of a layer instance includes:
-        1. construct layer without input_sample_shapes, goto 2;
-           construct layer with input_sample_shapes, goto 3;
-        2. call setup to create the parameters and setup other meta fields
-        3. call forward or access layer members
-        4. call backward and get parameters for update
-
-    Args:
-        name (str): layer name
-    '''
-
-    def __init__(self, name, **kwargs):
-        self.layer = None  # layer converted by swig
-        self.name = name  # TODO(wangwei) duplicate with self.conf.name
-        self.conf = model_pb2.LayerConf()
-        self.conf.name = name
-        self.param_specs = []
-        self.has_setup = False
-
-    def param_names(self):
-        '''
-        Returns:
-            a list of strings, one for the name of one parameter Tensor
-        '''
-        names = []
-        for x in self.param_specs:
-            names.append(x['name'])
-        return names
-
-    def setup(self, in_shapes):
-        '''Call the C++ setup function to create params and set some meta data.
-
-        Args:
-            in_shapes: if the layer accepts a single input Tensor, in_shapes is
-                a single tuple specifying the inpute Tensor shape; if the layer
-                accepts multiple input Tensor (e.g., the concatenation layer),
-                in_shapes is a tuple of tuples, each for one input Tensor
-        '''
-        if self.has_setup:
-            return
-        self.layer.Setup(list(in_shapes),
-                         self.conf.SerializeToString())
-        self.has_setup = True
-
-    def get_output_sample_shape(self):
-        '''Called after setup to get the shape of the output sample(s).
-
-        Returns:
-            a tuple for a single output Tensor or a list of tuples if this 
layer
-            has multiple outputs
-        '''
-        assert self.has_setup, \
-            'Must call setup() before get_output_sample_shape()'
-        return self.layer.GetOutputSampleShape()
-
-    def param_values(self):
-        '''Return param value tensors.
-
-        Parameter tensors are not stored as layer members because cpp Tensor
-        could be moved onto diff devices due to the change of layer device,
-        which would result in inconsistency.
-
-        Returns:
-            a list of tensors, one for each paramter
-        '''
-        if self.layer is None:
-            return []
-        else:
-            return tensor.from_raw_tensors(self.layer.param_values())
-
-    def forward(self, flag, x):
-        '''Forward propagate through this layer.
-
-        Args:
-            flag (int): kTrain or kEval
-            x (Tensor or list<Tensor>): an input tensor if the layer is
-                connected from a single layer; a list of tensors if the layer
-                is connected from multiple layers.
-
-        Return:
-            a tensor if the layer is connected to a single layer; a list of
-            tensors if the layer is connected to multiple layers;
-        '''
-        assert self.has_setup, 'Must call setup() before forward()'
-        if type(x) == list:
-            xs = []
-            for t in x:
-                x.append(t.singa_tensor)
-        else:
-            assert isinstance(x, tensor.Tensor), \
-                'input must be a Tensor or a list of Tensor'
-            xs = x.singa_tensor
-        y = self.layer.Forward(flag, xs)
-        if type(y) == list:
-            return tensor.from_raw_tensors(y)
-        else:
-            return tensor.from_raw_tensor(y)
-
-    def backward(self, flag, dy):
-        '''Backward propagate gradients through this layer.
-
-        Args:
-            flag (int): for future use.
-            dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the
-                objective loss
-        Return:
-            <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of 
x
-            , dpi is the gradient of the i-th parameter
-        '''
-        if type(dy) == list:
-            dys = []
-            for t in dy:
-                dys.append(t.singa_tensor)
-        else:
-            assert isinstance(dy, tensor.Tensor), \
-                'the input must be a Tensor or a set of Tensor'
-            dys = dy.singa_tensor
-        ret = self.layer.Backward(flag, dys)
-        if type(ret[0]) == list:
-            dxs = tensor.from_raw_tensors(ret[0])
-        else:
-            dxs = tensor.from_raw_tensor(ret[0])
-        return dxs, tensor.from_raw_tensors(ret[1])
-
-    def to_device(self, device):
-        '''Move layer state tensors onto the given device.
-
-        Args:
-            device: swig converted device, created using singa.device
-        '''
-        if self.layer is not None:
-            self.layer.ToDevice(device)
-
-    def as_type(self, dtype):
-        pass
-
-    def __copy__(self):
-        pass
-
-    def __deepcopy__(self):
-        pass
-
-
-class Conv2D(Layer):
-    """Construct a layer for 2D convolution.
-
-    Args:
-        nb_kernels (int): num of the channels (kernels) of the input Tensor
-        kernel: an integer or a pair of integers for kernel height and width
-        stride: an integer or a pair of integers for stride height and width
-        border_mode (string): padding mode, case in-sensitive,
-            'valid' -> padding is 0 for height and width
-            'same' -> padding is half of the kernel (floor), the kernel must be
-            odd number.
-        cudnn_prefer (string): the preferred algorithm for cudnn convolution
-            which could be 'fatest', 'autotune', 'limited_workspace' and
-            'no_workspace'
-        data_format (string): either 'NCHW' or 'NHWC'
-        use_bias (bool): True or False
-        pad: an integer or a pair of integers for padding height and width
-        W_specs (dict): used to specify the weight matrix specs, fields
-            include,
-            'name' for parameter name
-            'lr_mult' for learning rate multiplier
-            'decay_mult' for weight decay multiplier
-            'init' for init method, which could be 'gaussian', 'uniform',
-            'xavier' and ''
-            'std', 'mean', 'high', 'low' for corresponding init methods
-            TODO(wangwei) 'clamp' for gradient constraint, value is scalar
-            'regularizer' for regularization, currently support 'l2'
-        b_specs (dict): hyper-parameters for bias vector, similar as W_specs
-        name (string): layer name.
-        input_sample_shape: 3d tuple for the shape of the input Tensor
-            without the batchsize, e.g., (channel, height, width) or
-            (height, width, channel)
-    """
-    def __init__(self, name, nb_kernels, kernel=3, stride=1, 
border_mode='same',
-                 cudnn_prefer='fatest', data_format='NCHW',
-                 use_bias=True, W_specs=None, b_specs=None,
-                 pad=None, input_sample_shape=None):
-        super(Conv2D, self).__init__(name)
-        assert data_format == 'NCHW', 'Not supported data format: %s ' \
-            'only "NCHW" is enabled currently' % (data_format)
-        conf = self.conf.convolution_conf
-        conf.num_output = nb_kernels
-        conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad)
-        conf.bias_term = use_bias
-        # TODO(wangwei) enable data format for cpp code
-        # conf.data_format = data_format
-        if W_specs is None:
-            W_specs = {'init': 'xavier'}
-        if b_specs is None:
-            b_specs = {'init': 'constant'}
-        if 'name' not in W_specs:
-            W_specs['name'] = name + '_weight'
-        if 'name' not in b_specs:
-            b_specs['name'] = name + '_bias'
-        wspecs = _construct_param_specs_from_dict(W_specs)
-        self.conf.param.extend([wspecs])
-        self.param_specs.append(wspecs)
-        bspecs = _construct_param_specs_from_dict(b_specs)
-        self.conf.param.extend([bspecs])
-        self.param_specs.append(bspecs)
-
-        _check_engine(engine, ['cudnn', 'singacpp'])
-        self.layer = _create_layer(engine, 'Convolution')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-
-class Conv1D(Conv2D):
-    """Construct a layer for 1D convolution.
-
-    Most of the args are the same as those for Conv2D except the kernel,
-    stride, pad, which is a scalar instead of a tuple.
-    input_sample_shape is a tuple with a single value for the input feature
-    length
-    """
-
-    def __init__(self, name, nb_kernels, kernel=3, stride=1,
-                 border_mode='same', cudnn_prefer='fatest',
-                 use_bias=True, W_specs={'init': 'Xavier'},
-                 b_specs={'init': 'Constant', 'value': 0}, pad=None,
-                 input_sample_shape=None):
-        pad = None
-        if pad is not None:
-            pad = (0, pad)
-        if input_sample_shape is not None:
-            input_sample_shape = (1, 1, input_sample_shape[0])
-        super(Conv1D, self).__init__(name, nb_kernels, (1, kernel), (0, 
stride),
-                                     border_mode, cudnn_prefer,
-                                     use_bias=use_bias, pad=pad,
-                                     W_specs=W_specs, b_specs=b_specs,
-                                     input_sample_shape=input_sample_shape)
-
-    def get_output_sample_shape(self):
-        shape = self.layer.GetOutputSampleShape()
-        assert len(shape) == 3, 'The output sample shape should be 3D.'\
-            'But the length is %d' % len(shape)
-        return (shape[0], shape[2])
-
-
-class Pooling2D(Layer):
-    '''2D pooling layer providing max/avg pooling.
-
-    All args are the same as those for Conv2D, except the following one
-
-    Args:
-        mode: pooling type, model_pb2.PoolingConf.MAX or
-            model_pb2.PoolingConf.AVE
-
-    '''
-    def __init__(self, name, mode, kernel=3, stride=2, border_mode='same',
-                 pad=None, data_format='NCHW', input_sample_shape=None):
-        super(Pooling2D, self).__init__(name)
-        assert data_format == 'NCHW', 'Not supported data format: %s ' \
-            'only "NCHW" is enabled currently' % (data_format)
-        conf = self.conf.pooling_conf
-        conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad)
-        conf.pool = mode
-        _check_engine(engine, ['cudnn', 'singacpp'])
-        self.layer = _create_layer(engine, 'Pooling')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-
-class MaxPooling2D(Pooling2D):
-
-    def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', input_sample_shape=None):
-        super(MaxPooling2D, self).__init__(name, model_pb2.PoolingConf.MAX,
-                                           kernel, stride, border_mode,
-                                           pad, data_format, 
input_sample_shape)
-
-
-class AvgPooling2D(Pooling2D):
-
-    def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', input_sample_shape=None):
-        super(AvgPooling2D, self).__init__(name, model_pb2.PoolingConf.AVE,
-                                           kernel, stride, border_mode,
-                                           pad, data_format, 
input_sample_shape)
-
-
-class MaxPooling1D(MaxPooling2D):
-
-    def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', input_sample_shape=None):
-        """Max pooling for 1D feature.
-
-        Args:
-            input_sample_shape (tuple): 1D tuple for input feature length
-        """
-        pad = None
-        if pad is not None:
-            pad = (0, pad)
-        if input_sample_shape is not None:
-            assert len(input_sample_shape) == 1, \
-                'AvgPooling1D expects input sample to be 1D'
-            input_sample_shape = (1, 1, input_sample_shape[0])
-        else:
-            input_sample_shape = None
-        super(MaxPooling1D, self).__init__(name, (1, kernel), (0, stride),
-                                           border_mode, pad,
-                                           data_format, input_sample_shape)
-
-    def get_output_sample_shape(self):
-        shape = self.layer.GetOutputSampleShape()
-        return (shape[2],)
-
-
-class AvgPooling1D(AvgPooling2D):
-
-    def __init__(self, name, kernel=3, stride=2, border_mode='same', pad=None,
-                 data_format='NCHW', input_sample_shape=None):
-        """input_feature_length is a scalar value"""
-        pad2 = None
-        if pad is not None:
-            pad2 = (pad, 0)
-        if input_sample_shape is not None:
-            assert len(input_sample_shape) == 1, \
-                'AvgPooling1D expects input sample to be 1D'
-            input_sample_shape = (1, 1, input_sample_shape[0])
-        else:
-            input_sample_shape = None
-
-        super(AvgPooling1D, self).__init__(name, (kernel, 1), (0, stride),
-                                           border_mode, pad2,
-                                           data_format, input_sample_shape)
-
-    def get_output_sample_shape(self):
-        shape = self.layer.GetOutputSampleShape()
-        return (shape[2],)
-
-
-class BatchNormalization(Layer):
-    """Batch-normalization.
-
-    Args:
-        momentum (float): for running average mean and variance.
-        beta_specs (dict): dictionary includes the fields for the beta
-            param:
-            'name' for parameter name
-            'lr_mult' for learning rate multiplier
-            'decay_mult' for weight decay multiplier
-            'init' for init method, which could be 'gaussian', 'uniform',
-            'xavier' and ''
-            'std', 'mean', 'high', 'low' for corresponding init methods
-            'clamp' for gradient constraint, value is scalar
-            'regularizer' for regularization, currently support 'l2'
-        gamma_specs (dict): similar to beta_specs, but for the gamma param.
-        name (string): layer name
-        input_sample_shape (tuple): with at least one integer
-    """
-    def __init__(self, name, momentum=0.9,
-                 beta_specs=None, gamma_specs=None, input_sample_shape=None):
-        super(BatchNormalization, self).__init__(name)
-        conf = self.conf.batchnorm_conf
-        conf.factor = momentum
-        if beta_specs is None:
-            beta_specs = {'init': 'Xavier'}
-        if gamma_specs is None:
-            gamma_specs = {'init': 'Xavier'}
-        if 'name' not in beta_specs:
-            beta_specs['name'] = name + '_beta'
-        if 'name' not in gamma_specs:
-            gamma_specs['name'] = name + '_gamma'
-        mean_specs = {'init': 'constant', 'value': 0, 'name': name+'_mean'}
-        var_specs = {'init': 'constant', 'value': 1, 'name': name+'_var'}
-        self.conf.param.extend([_construct_param_specs_from_dict(gamma_specs)])
-        self.conf.param.extend([_construct_param_specs_from_dict(beta_specs)])
-        self.conf.param.extend([_construct_param_specs_from_dict(mean_specs)])
-        self.conf.param.extend([_construct_param_specs_from_dict(var_specs)])
-        self.param_specs.append(_construct_param_specs_from_dict(gamma_specs))
-        self.param_specs.append(_construct_param_specs_from_dict(beta_specs))
-        self.param_specs.append(_construct_param_specs_from_dict(mean_specs))
-        self.param_specs.append(_construct_param_specs_from_dict(var_specs))
-        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
-                               'singacl'])
-        self.layer = _create_layer(engine, 'BatchNorm')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-
-class LRN(Layer):
-    """Local response normalization.
-
-    Args:
-        size (int): # of channels to be crossed
-            normalization.
-        mode (string): 'cross_channel'
-        input_sample_shape (tuple): 3d tuple, (channel, height, width)
-    """
-
-    def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
-                 k=1, input_sample_shape=None):
-        super(LRN, self).__init__(name)
-        conf = self.conf.lrn_conf
-        conf.local_size = size
-        conf.alpha = alpha
-        conf.beta = beta
-        conf.k = k
-        # TODO(wangwei) enable mode = 'within_channel'
-        assert mode == 'cross_channel', 'only support mode="across_channel"'
-        conf.norm_region = model_pb2.LRNConf.ACROSS_CHANNELS
-        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
-                               'singacl'])
-        self.layer = _create_layer(engine, 'LRN')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-
-class Dense(Layer):
-    """Apply linear/affine transformation, also called inner-product or
-    fully connected layer.
-
-    Args:
-        num_output (int): output feature length.
-        use_bias (bool): add a bias vector or not to the transformed feature
-        W_specs (dict): specs for the weight matrix
-            'name' for parameter name
-            'lr_mult' for learning rate multiplier
-            'decay_mult' for weight decay multiplier
-            'init' for init method, which could be 'gaussian', 'uniform',
-            'xavier' and ''
-            'std', 'mean', 'high', 'low' for corresponding init methods
-            'clamp' for gradient constraint, value is scalar
-            'regularizer' for regularization, currently support 'l2'
-        b_specs (dict): specs for the bias vector, same fields as W_specs.
-        W_transpose (bool): if true, output=x*W.T+b;
-        input_sample_shape (tuple): input feature length
-    """
-    def __init__(self, name, num_output, use_bias=True,
-                 W_specs=None, b_specs=None,
-                 W_transpose=False, input_sample_shape=None):
-        """Apply linear/affine transformation, also called inner-product or
-        fully connected layer.
-
-        Args:
-            num_output (int): output feature length.
-            use_bias (bool): add a bias vector or not to the transformed 
feature
-            W_specs (dict): specs for the weight matrix
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            b_specs (dict): specs for the bias vector, same fields as W_specs.
-            W_transpose (bool): if true, output=x*W.T+b;
-            input_sample_shape (tuple): input feature length
-        """
-        super(Dense, self).__init__(name)
-        conf = self.conf.dense_conf
-        conf.num_output = num_output
-        conf.bias_term = use_bias
-        conf.transpose = W_transpose
-        if W_specs is None:
-            W_specs = {'init': 'xavier'}
-        if b_specs is None:
-            b_specs = {'init': 'constant', 'value': 0}
-        if 'name' not in W_specs:
-            W_specs['name'] = name + '_weight'
-        if 'name' not in b_specs:
-            b_specs['name'] = name + '_bias'
-        wspecs = _construct_param_specs_from_dict(W_specs)
-        bspecs = _construct_param_specs_from_dict(b_specs)
-        self.conf.param.extend([wspecs, bspecs])
-        self.param_specs.extend([wspecs, bspecs])
-        # dense layer is transparent to engine.
-        if engine == 'cudnn':
-            self.layer = _create_layer('singacuda', 'Dense')
-        else:
-            self.layer = _create_layer(engine, 'Dense')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-
-class Dropout(Layer):
-    """Droput layer.
-
-    Args:
-        p (float): probability for dropping out the element, i.e., set to 0
-        name (string): layer name
-    """
-
-    def __init__(self, name, p=0.5, input_sample_shape=None):
-        super(Dropout, self).__init__(name)
-        conf = self.conf.dropout_conf
-        conf.dropout_ratio = p
-        # 'cudnn' works for v>=5.0
-        #  if engine.lower() == 'cudnn':
-        #      engine = 'cuda'
-        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
-                               'singacl'])
-        self.layer = _create_layer(engine, 'Dropout')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-
-class Activation(Layer):
-    """Activation layers.
-
-    Args:
-        name (string): layer name
-        mode (string): 'relu', 'sigmoid', or 'tanh'
-        input_sample_shape (tuple): shape of a single sample
-    """
-    def __init__(self, name, mode='relu', input_sample_shape=None):
-        super(Activation, self).__init__(name)
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
-        self.conf.type = (engine + '_' + mode).lower()
-        self.layer = _create_layer(engine, mode)
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-
-class Softmax(Layer):
-    """Apply softmax.
-
-    Args:
-        axis (int): reshape the input as a matrix with the dimension
-            [0,axis) as the row, the [axis, -1) as the column.
-        input_sample_shape (tuple): shape of a single sample
-    """
-    def __init__(self, name, axis=1, input_sample_shape=None):
-        super(Softmax, self).__init__(name)
-        # conf = self.conf.softmax_conf
-        # conf.axis = axis
-        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacl',
-                               'singacuda'])
-        self.layer = _create_layer(engine, 'Softmax')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-
-class Flatten(Layer):
-    """Reshape the input tensor into a matrix.
-
-    Args:
-        axis (int): reshape the input as a matrix with the dimension
-            [0,axis) as the row, the [axis, -1) as the column.
-        input_sample_shape (tuple): shape for a single sample
-    """
-    def __init__(self, name, axis=1, input_sample_shape=None):
-        super(Flatten, self).__init__(name)
-        conf = self.conf.flatten_conf
-        conf.axis = axis
-        # fltten layer is transparent to engine
-        if engine == 'cudnn':
-            self.layer = _create_layer('singacuda', 'Flatten')
-        else:
-            self.layer = _create_layer(engine, 'Flatten')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-
-class Merge(Layer):
-    '''Sum all input tensors.
-
-    Args:
-        input_sample_shape: sample shape of the input. The sample shape of all
-            inputs should be the same.
-    '''
-    def __init__(self, name, input_sample_shape=None):
-        self.in_shape = input_sample_shape
-        self.num_input = 1
-        super(Merge, self).__init__(name)
-
-    def setup(self, in_shape):
-        self.in_shape = in_shape
-        self.has_setup = True
-
-    def get_output_sample_shape(self):
-        return self.in_shape
-
-    def forward(self, flag, inputs):
-        assert len(inputs) > 1, 'There must be multiple input tensors'
-        self.num_input = len(inputs)
-        output = tensor.Tensor()
-        output.reset_like(inputs[0])
-        output.set_value(0)
-        for x in inputs:
-            output += x
-        return output
-
-    def backward(self, flag, grad):
-        assert isinstance(grad, tensor.Tensor), 'The input must be Tensor'
-        return [grad], []  # * self.num_input
-
-
-class Split(Layer):
-    '''Replicate the input tensor.
-
-    Args:
-        num_output (int): number of output tensors to generate.
-        input_sample_shape: includes a single integer for the input sample
-            feature size.
-    '''
-    def __init__(self, name, num_output, input_sample_shape=None):
-        self.num_output = num_output
-        self.in_shape = input_sample_shape
-        super(Split, self).__init__(name)
-
-    def setup(self, in_shape):
-        self.in_shape = in_shape
-        self.has_setup = True
-
-    def get_output_sample_shape(self):
-        return self.in_shape
-
-    def forward(self, flag, input):
-        assert isinstance(input, tensor.Tensor), 'The input must be Tensor'
-        outputs = [input] * self.num_output
-        return outputs
-
-    def backward(self, flag, grads):
-        assert len(grads) > 1, 'There must be multiple gradients'
-        dx = tensor.Tensor()
-        dx.reset_like(grads[0])
-        dx.set_value(0)
-        for g in grads:
-            dx += g
-        return dx, []
-
-
-class RNN(Layer):
-    '''Recurrent layer with 4 types of units, namely lstm, gru, tanh and relu.
-
-    Args:
-        hidden_size: hidden feature size, the same for all stacks of layers.
-        rnn_mode: decides the rnn unit, which could be one of 'lstm', 'gru',
-            'tanh' and 'relu', refer to cudnn manual for each mode.
-        num_stacks: num of stacks of rnn layers. It is different to the
-            unrolling seqence length.
-        input_mode: 'linear' convert the input feature x by by a linear
-            transformation to get a feature vector of size hidden_size;
-            'skip' does nothing but requires the input feature size equals
-            hidden_size
-        bidirection: True for bidirectional RNN
-        param_specs: config for initializing the RNN parameters.
-        input_sample_shape: includes a single integer for the input sample
-            feature size.
-    '''
-
-    def __init__(self, name, hidden_size, rnn_mode='lstm', dropout=0.0,
-                 num_stacks=1, input_mode='linear', bidirectional=False,
-                 param_specs=None, input_sample_shape=None):
-        super(RNN, self).__init__(name)
-        conf = self.conf.rnn_conf
-        assert hidden_size > 0, 'Hidden feature size must > 0'
-        conf.hidden_size = hidden_size
-        assert rnn_mode in Set(['lstm', 'gru', 'tanh', 'relu']),  \
-            'rnn mode %s is not available' % (rnn_mode)
-        conf.rnn_mode = rnn_mode
-        conf.num_stacks = num_stacks
-        conf.dropout = dropout
-        conf.input_mode = input_mode
-        conf.direction = 'unidirectional'
-        if bidirectional:
-            conf.direction = 'bidirectional'
-        # currently only has rnn layer implemented using cudnn
-        _check_engine(engine, ['cudnn'])
-        if param_specs is None:
-            param_specs = {'name': name + '-weight',
-                           'init': 'uniform', 'low': 0, 'high': 1}
-        self.conf.param.extend([_construct_param_specs_from_dict(param_specs)])
-        self.param_specs.append(_construct_param_specs_from_dict(param_specs))
-
-        self.layer = singa_wrap.CudnnRNN()
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-    def forward(self, flag, inputs):
-        '''Forward inputs through the RNN.
-
-        Args:
-            flag, kTrain or kEval.
-            inputs, <x1, x2,...xn, hx, cx>, where xi is the input tensor for 
the
-                i-th position, its shape is (batch_size, input_feature_length);
-                the batch_size of xi must >= that of xi+1; hx is the initial
-                hidden state of shape (num_stacks * bidirection?2:1, 
batch_size,
-                hidden_size). cx is the initial cell state tensor of the same
-                shape as hy. cx is valid for only lstm. For other RNNs there is
-                no cx. Both hx and cx could be dummy tensors without shape and
-                data.
-
-        Returns:
-            <y1, y2, ... yn, hy, cy>, where yi is the output tensor for the 
i-th
-                position, its shape is (batch_size,
-                hidden_size * bidirection?2:1). hy is the final hidden state
-                tensor. cx is the final cell state tensor. cx is only used for
-                lstm.
-        '''
-        assert self.has_setup, 'Must call setup() before forward()'
-        assert len(inputs) > 1, 'The input to RNN must include at '\
-            'least one input tensor '\
-            'and one hidden state tensor (could be a dummy tensor)'
-        tensors = []
-        for t in inputs:
-            assert isinstance(t, tensor.Tensor), \
-                'input must be py Tensor %s' % (type(t))
-            tensors.append(t.singa_tensor)
-        y = self.layer.Forward(flag, tensors)
-        return tensor.from_raw_tensors(y)
-
-    def backward(self, flag, grad):
-        '''Backward gradients through the RNN.
-
-        Args:
-            flag, for future use.
-            grad, <dy1, dy2,...dyn, dhy, dcy>, where dyi is the gradient for 
the
-            i-th output, its shape is (batch_size, 
hidden_size*bidirection?2:1);
-                dhy is the gradient for the final hidden state, its shape is
-                (num_stacks * bidirection?2:1, batch_size,
-                hidden_size). dcy is the gradient for the final cell state.
-                cx is valid only for lstm. For other RNNs there is
-                no cx. Both dhy and dcy could be dummy tensors without shape 
and
-                data.
-
-        Returns:
-            <dx1, dx2, ... dxn, dhx, dcx>, where dxi is the gradient tensor for
-                the i-th input, its shape is (batch_size,
-                input_feature_length). dhx is the gradient for the initial
-                hidden state. dcx is the gradient for the initial cell state,
-                which is valid only for lstm.
-        '''
-        tensors = []
-        for t in grad:
-            assert isinstance(t, tensor.Tensor), 'grad must be py Tensor'
-            tensors.append(t.singa_tensor)
-        ret = self.layer.Backward(flag, tensors)
-        return tensor.from_raw_tensors(ret[0]), tensor.from_raw_tensors(ret[1])
-
-
-class LSTM(RNN):
-    def __init__(self, name, hidden_size, dropout=0.0, num_stacks=1,
-                 input_mode='linear', bidirectional=False,
-                 param_specs=None, input_sample_shape=None):
-        super(LSTM, self).__init__(name, hidden_size,  'lstm',  dropout,
-                                   num_stacks, input_mode, bidirectional,
-                                   param_specs, input_sample_shape)
-
-
-class GRU(RNN):
-    def __init__(self, name, hidden_size, dropout=0.0, num_stacks=1,
-                 input_mode='linear', bidirectional=False, param_specs=None,
-                 input_sample_shape=None):
-        super(GRU, self).__init__(name,  hidden_size, 'gru',  dropout,
-                                  num_stacks, input_mode, bidirectional,
-                                  param_specs, input_sample_shape)
-
-
-def _check_engine(engine, allowed_engines):
-    assert engine.lower() in Set(allowed_engines), \
-           '%s is not a supported engine. Pls use one of %s' % \
-           (engine, ', '.join(allowed_engines))
-
-
-def _create_layer(eng, layer):
-    ''' create singa wrap layer.
-
-    Both arguments are case insensitive.
-    Args:
-        engine, implementation engine, either 'singa' or 'cudnn'
-        layer, layer type, e.g., 'convolution', 'pooling'; for activation
-        layers, use the specific activation mode, e.g. 'relu', 'tanh'.
-    '''
-    layer_type = eng + '_' + layer
-    return singa_wrap.CreateLayer(layer_type.lower())
-
-
-def _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad):
-    """Private function called by Convolution2D and Pooling2D."""
-    if isinstance(kernel, tuple):
-        conf.kernel_h = kernel[0]
-        conf.kernel_w = kernel[1]
-    else:
-        conf.kernel_h = kernel
-        conf.kernel_w = kernel
-    if isinstance(stride, tuple):
-        conf.stride_h = stride[0]
-        conf.stride_w = stride[1]
-    else:
-        conf.stride_h = stride
-        conf.stride_w = stride
-    mode = border_mode.lower()
-    if pad is None:
-        # TODO(wangwei) check the border mode
-        if mode == 'same':
-            assert conf.kernel_h % 2 == 1 and conf.kernel_w % 2 == 1, \
-                'Must use odd kernel for mode="same", kernel is (%d, %d)' % (
-                    conf.kernel_h, conf.kernel_w)
-            pad = (conf.kernel_h / 2, conf.kernel_w / 2)
-        elif mode == 'valid':
-            pad = (0, 0)
-        else:
-            assert False, ('Unsupported border_mode: %s. '
-                           'Please use {"valid", "same"}' % border_mode)
-        assert isinstance(pad, tuple), 'pad should be a tuple'
-    if isinstance(pad, tuple):
-        conf.pad_h = pad[0]
-        conf.pad_w = pad[1]
-    else:
-        conf.pad_h = pad
-        conf.pad_w = pad
-    return conf
-
-
-def _construct_param_specs_from_dict(specs):
-    """Conver the param specs from a dict into ParamSpec protobuf object.
-
-    Args:
-        specs (dict): the fields inlcude
-            'name' for parameter name
-            'lr_mult' for learning rate multiplier;
-            'decay_mult' for weight decay multiplier;
-            'init' for init method, which could be 'gaussian', 'uniform',
-            'xavier' and 'msra';
-            'std', 'mean', 'high', 'low' are used by corresponding init 
methods;
-            'constraint' for gradient constraint, value is a float threshold 
for
-                clampping the gradient.
-            'regularizer' for regularization, currently support 'l2', value is 
a
-                float for the coefficient.
-
-    Returns:
-        a ParamSpec object
-    """
-    conf = model_pb2.ParamSpec()
-    if 'name' in specs:
-        conf.name = specs['name']
-    if 'lr_mult' in specs:
-        conf.lr_mult = specs['lr_mult']
-    if 'decay_mult' in specs:
-        conf.decay_mult = specs['decay_mult']
-    if 'init' in specs:
-        filler = conf.filler
-        filler.type = specs['init'].lower()
-        if specs['init'].lower() == 'uniform':
-            assert 'low' in specs and 'high' in specs, \
-                'low and high are required for "uniform" init method'
-            filler.min = specs['low']
-            filler.max = specs['high']
-        elif specs['init'].lower() == 'gaussian':
-            assert 'mean' in specs and 'std' in specs, \
-                'std and mean are required for "gaussian" init method'
-            filler.mean = specs['mean']
-            filler.std = specs['std']
-        elif specs['init'].lower() == 'constant' and 'value' in specs:
-            filler.value = specs['value']
-    if 'regularizer' in specs:
-        conf.regularizer.coefficient = specs['regularizer']
-    if 'constraint' in specs:
-        conf.constraint.threshold = specs['constraint']
-    return conf
-
-
-def get_layer_list():
-    """ Return a list of strings which include the identifiers (tags) of all
-    supported layers
-    """
-    return singa_wrap.GetRegisteredLayers()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/loss.py
----------------------------------------------------------------------
diff --git a/src/python/singa/loss.py b/src/python/singa/loss.py
deleted file mode 100644
index c88290b..0000000
--- a/src/python/singa/loss.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-
-'''
-Loss module includes a set of training loss implmentations. Some are converted
-from C++ implementation, and the rest are implemented directly using python
-Tensor.
-
-Example usage::
-
-    from singa import tensor
-    from singa import loss
-    from singa.proto import model_pb2
-
-    x = tensor.Tensor((3, 5))
-    x.uniform(0, 1)  # randomly genearte the prediction activation
-    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
-
-    f = loss.SoftmaxCrossEntropy()
-    l = f.forward(model_pb2.kTrain, x, y)  # l is tensor with 3 loss values
-    g = f.backward()  # g is a tensor containing all gradients of x w.r.t l
-'''
-
-
-from . import singa_wrap as singa
-import tensor
-
-
-class Loss(object):
-    '''Base loss class.
-
-    Subclasses that wrap the C++ loss classes can use the inherited foward,
-    backward, and evaluate functions of this base class. Other subclasses need
-    to override these functions
-    '''
-
-    def __init__(self):
-        self.swig_loss = None
-
-    def forward(self, flag, x, y):
-        '''Compute the loss values.
-
-        Args:
-            flag (int): kTrain or kEval. If it is kTrain, then the backward
-                function must be called before calling forward again.
-            x (Tensor): the prediction Tensor
-            y (Tensor): the ground truch Tensor, x.shape[0] must = y.shape[0]
-
-        Returns:
-            a tensor of floats for the loss values, one per sample
-        '''
-        return tensor.from_raw_tensor(
-            self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor))
-
-    def backward(self):
-        '''
-        Returns:
-            the grad of x w.r.t. the loss
-        '''
-        return tensor.from_raw_tensor(self.swig_loss.Backward())
-
-    def evaluate(self, flag, x, y):  # TODO(wangwei) remove flag
-        '''
-        Args:
-            flag (int): must be kEval, to be removed
-            x (Tensor): the prediction Tensor
-            y (Tensor): the ground truth Tnesor
-
-        Returns:
-            the averaged loss for all samples in x.
-        '''
-        return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor)
-
-
-class SoftmaxCrossEntropy(Loss):
-    '''This loss function is a combination of SoftMax and Cross-Entropy loss.
-
-    It converts the inputs via SoftMax function and then
-    computes the cross-entropy loss against the ground truth values.
-    '''
-
-    def __init__(self):
-        self.swig_loss = singa.SoftmaxCrossEntropy()
-
-
-class SquaredError(Loss):
-    '''This loss evaluates the squared error between the prediction and the
-    truth values.
-
-    It is implemented using Python Tensor operations.
-    '''
-    def __init__(self):
-        super(Loss, SquaredError).__init__()
-        self.err = None
-
-    def forward(self, flag, x, y):
-        '''Compute the error as 0.5 * ||x-y||^2.
-
-        Args:
-            flag (int): kTrain or kEval; if kTrain, then the backward must be
-                called before calling forward again.
-            x (Tensor): the prediction Tensor
-            y (Tensor): the truth Tensor, an integer value per sample, whose
-                value is [0, x.shape[1])
-
-        Returns:
-            a Tensor with one error value per sample
-        '''
-        self.err = x - y
-        return 0.5 * tensor.squared(self.err)
-
-    def backward(self):
-        '''Compute the gradient of x w.r.t the error.
-
-        Returns:
-            x - y
-        '''
-        return self.err
-
-    def evaluate(self, flag, x, y):
-        '''Compuate the averaged error.
-
-        Returns:
-            a float value as the averaged error
-        '''
-        return tensor.sum(0.5 * tensor.squared(x - y)) / x.size()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/metric.py
----------------------------------------------------------------------
diff --git a/src/python/singa/metric.py b/src/python/singa/metric.py
deleted file mode 100644
index 3a5750d..0000000
--- a/src/python/singa/metric.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-'''This module includes a set of metric classes for evaluating the model's
-performance. The specific metric classes could be converted from C++
-implmentation or implemented directly using Python.
-
-
-Example usage::
-
-    from singa import tensor
-    from singa import metric
-
-    x = tensor.Tensor((3, 5))
-    x.uniform(0, 1)  # randomly genearte the prediction activation
-    x = tensor.SoftMax(x)  # normalize the prediction into probabilities
-    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
-
-    f = metric.Accuracy()
-    acc = f.evaluate(x, y)  # averaged accuracy over all 3 samples in x
-
-'''
-
-from . import singa_wrap as singa
-import tensor
-
-
-class Metric(object):
-    '''Base metric class.
-
-    Subclasses that wrap the C++ loss classes can use the inherited foward,
-    and evaluate functions of this base class. Other subclasses need
-    to override these functions. Users need to feed in the **predictions** and
-    ground truth to get the metric values.
-    '''
-
-    def __init__(self):
-        self.swig_metric = None
-
-    def forward(self, x, y):
-        '''Compute the metric for each sample.
-
-        Args:
-            x (Tensor): predictions, one row per sample
-            y (Tensor): ground truth values, one row per sample
-
-        Returns:
-            a tensor of floats, one per sample
-        '''
-        return tensor.from_raw_tensor(
-            self.swig_metric.Forward(x.singa_tensor, y.singa_tensor))
-
-    def evaluate(self, x, y):
-        '''Compute the averaged metric over all samples.
-
-        Args:
-            x (Tensor): predictions, one row per sample
-            y (Tensor): ground truth values, one row per sample
-        Returns:
-            a float value for the averaged metric
-        '''
-        return self.swig_metric.Evaluate(x.singa_tensor, y.singa_tensor)
-
-
-class Accuracy(Metric):
-    '''Compute the top one accuracy for singel label prediction tasks.
-
-    It calls the C++ functions to do the calculation.
-    '''
-    def __init__(self):
-        self.swig_metric = singa.Accuracy()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/model.py
----------------------------------------------------------------------
diff --git a/src/python/singa/model.py b/src/python/singa/model.py
deleted file mode 100644
index 38d9950..0000000
--- a/src/python/singa/model.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#/**
-# * Licensed to the Apache Software Foundation (ASF) under one
-# * or more contributor license agreements.  See the NOTICE file
-# * distributed with this work for additional information
-# * regarding copyright ownership.  The ASF licenses this file
-# * to you under the Apache License, Version 2.0 (the
-# * "License"); you may not use this file except in compliance
-# * with the License.  You may obtain a copy of the License at
-# *
-# *     http://www.apache.org/licenses/LICENSE-2.0
-# *
-# * Unless required by applicable law or agreed to in writing, software
-# * distributed under the License is distributed on an "AS IS" BASIS,
-# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# * See the License for the specific language governing permissions and
-# * limitations under the License.
-# */
-
-class Model(Object):
-    pass
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d76caea3/src/python/singa/net.py
----------------------------------------------------------------------
diff --git a/src/python/singa/net.py b/src/python/singa/net.py
deleted file mode 100644
index 0026953..0000000
--- a/src/python/singa/net.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-"""
-Nerual net class for constructing the nets using layers and providing access
-functions for net info, e.g., parameters.
-"""
-
-
-from .proto.model_pb2 import kTrain, kEval
-import tensor
-import layer
-import cPickle as pickle
-
-
-class FeedForwardNet(object):
-
-    def __init__(self, loss=None, metric=None):
-        self.loss = loss
-        self.metric = metric
-        self.layers = []
-        self.src_of_layer = {}
-        self.dst_of_layer = None
-        self.ordered_layers = None
-
-    def to_device(self, dev):
-        for lyr in self.layers:
-            lyr.to_device(dev)
-
-    def add(self, lyr, src=None):
-        """Append a layer into the layer list.
-
-        This function will get the sample shape from the last layer to setup
-        the newly added layer. For the first layer, it is setup outside.
-        The calling function should ensure the correctness of the layer order.
-
-        Args:
-            lyr (Layer): the layer to be added
-        """
-        if src is not None:
-            if isinstance(src, layer.Layer):
-                assert src.has_setup is True, 'the source layer must be set up'
-                self.src_of_layer[lyr.name] = [src]
-            else:
-                assert type(src) == list, 'the src must be a list of layers'
-                self.src_of_layer[lyr.name] = src
-                # print 'merge------', len(src)
-        else:
-            assert len(self.layers) > 0 or lyr.has_setup, \
-                'Source layers are needed to set up this layer'
-            if len(self.layers) > 0:
-                self.src_of_layer[lyr.name] = [self.layers[-1]]
-            else:
-                self.src_of_layer[lyr.name] = []
-        if lyr.has_setup is False:
-            # print shape
-            in_shape = self.src_of_layer[lyr.name][0].get_output_sample_shape()
-            lyr.setup(in_shape)
-            print lyr.name, lyr.get_output_sample_shape()
-        self.layers.append(lyr)
-        return lyr
-
-    def param_values(self):
-        values = []
-        layers = self.layers
-        if self.ordered_layers is not None:
-            layers = self.ordered_layers
-        for lyr in layers:
-            values.extend(lyr.param_values())
-        return values
-
-    def param_specs(self):
-        specs = []
-        layers = self.layers
-        if self.ordered_layers is not None:
-            layers = self.ordered_layers
-        for lyr in layers:
-            specs.extend(lyr.param_specs)
-        return specs
-
-    def param_names(self):
-        return [spec.name for spec in self.param_specs()]
-
-    def train(self, x, y):
-        out = self.forward(kTrain, x)
-        l = self.loss.forward(kTrain, out, y)
-        if self.metric is not None:
-            m = self.metric.evaluate(out, y)
-        return self.backward(), (l.l1(), m)
-
-    def evaluate(self, x, y):
-        """Evaluate the loss and metric of the given data"""
-        out = self.forward(kEval, x)
-        l = None
-        m = None
-        assert self.loss is not None or self.metric is not None,\
-            'Cannot do evaluation, as neither loss nor metic is set'
-        if self.loss is not None:
-            l = self.loss.evaluate(kEval, out, y)
-        if self.metric is not None:
-            m = self.metric.evaluate(out, y)
-        return l, m
-
-    def predict(self, x):
-        xx = self.forward(kEval, x)
-        return tensor.softmax(xx)
-
-    def topo_sort(self, cur, src_of_layer, visited=None, order=None):
-        if visited is None:
-            visited = {}
-            for name in src_of_layer.keys():
-                visited[name] = False
-            order = []
-        srcs = src_of_layer[cur.name]
-        for src in srcs:
-            if visited[src.name] is False:
-                visited[src.name] = True
-                self.topo_sort(src, src_of_layer, visited, order)
-        order.append(cur)
-        visited[cur.name] = True
-        return order
-
-    def forward(self, flag, x):
-        # print x.l1()
-        if self.ordered_layers is None:
-            self.ordered_layers = self.topo_sort(self.layers[-1],
-                                                 self.src_of_layer)
-        inputs = [x]
-        output_of_layer = {}
-        for cur in self.ordered_layers:
-            srcs = self.src_of_layer[cur.name]
-            disp_src = cur.name + '<--'
-            for src in srcs:
-                outs = output_of_layer[src.name]
-                if type(outs) == list:
-                    inputs.append(outs[0])
-                else:
-                    inputs.append(outs)
-                disp_src += '+' + src.name
-                # del output_of_layer[src.name]
-            # print disp_src
-            if len(inputs) == 1:
-                inputs = inputs[0]
-            output_of_layer[cur.name] = cur.forward(flag, inputs)
-            inputs = []
-            # print lyr.name, x.l1()
-        # print output_of_layer
-        return output_of_layer[self.ordered_layers[-1].name]
-
-    def backward(self):
-        if self.dst_of_layer is None:
-            self.dst_of_layer = {}
-            for cur in self.layers:
-                self.dst_of_layer[cur.name] = []
-            for cur in self.ordered_layers[1:]:
-                srcs = self.src_of_layer[cur.name]
-                for src in srcs:
-                    self.dst_of_layer[src.name].append(cur)
-        grad = self.loss.backward()
-        if len(grad.shape) > 1:
-            grad /= grad.shape[0]  # average across the batch
-        # print 'grad', grad.l1()
-        grads = [grad]
-        output_of_layer = {}
-        pgrads = []
-        for cur in reversed(self.ordered_layers):
-            for dst in self.dst_of_layer[cur.name]:
-                outputs = output_of_layer[dst.name]
-                if type(outputs) == list:
-                    grads.append(outputs[0])
-                else:
-                    grads.append(outputs)
-                # del output_of_layer[dst.name]
-            if len(grads) == 1:
-                grads = grads[0]
-            outs, _pgrads = cur.backward(kTrain, grads)
-            pgrads.append(_pgrads)
-            output_of_layer[cur.name] = outs
-            grads = []
-
-        ret = []
-        for pgrad in reversed(pgrads):
-            ret.extend(pgrad)
-        return ret
-
-    def save(self, f):
-        """Save model parameters using cpickle"""
-        params = {}
-        for (specs, val) in zip(self.param_specs(), self.param_values()):
-            val.to_host()
-            params[specs.name] = tensor.to_numpy(val)
-        with open(f, 'wb') as fd:
-            pickle.dump(params, fd)
-
-    def load(self, f):
-        """Load model parameters using cpickle"""
-        with open(f, 'rb') as fd:
-            params = pickle.load(fd)
-        for (specs, val) in zip(self.param_specs(), self.param_values()):
-            val.copy_from_numpy(params[specs.name])

Reply via email to