[1/5] incubator-singa git commit: SINGA-300 - Add residual networks for imagenet classification

wangsh Thu, 16 Feb 2017 22:25:43 -0800

Repository: incubator-singa
Updated Branches:
  refs/heads/master 9b2d16134 -> f2ad93381



SINGA-300 - Add residual networks for imagenet classification

Add the wide residual network for imagenet serving; refer to 
https://github.com/szagoruyko/wide-residual-networks

convert original resnet from torch to singa. tested wide resnet;


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c0317d18
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c0317d18
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c0317d18

Branch: refs/heads/master
Commit: c0317d185004ea594f19b74cbb9bc2c97e66a7b1
Parents: 59ca44a
Author: Wei Wang <[email protected]>
Authored: Fri Feb 10 14:32:42 2017 +0800
Committer: Wei Wang <[email protected]>
Committed: Sun Feb 12 22:00:46 2017 +0800

----------------------------------------------------------------------
 examples/imagenet/resnet/convert.py  |  48 +++++++++++
 examples/imagenet/resnet/model.py    | 138 ++++++++++++++++++++++++++++++
 examples/imagenet/resnet/serve.py    | 138 ++++++++++++++++++++++++++++++
 python/singa/device.py               |   4 +-
 python/singa/layer.py                |  33 +++----
 python/singa/net.py                  |  34 +++++---
 src/model/layer/convolution.cc       |   8 +-
 src/model/layer/convolution.h        |   5 +-
 src/model/layer/cudnn_convolution.cc |   8 +-
 src/model/layer/dense.cc             |  16 ++--
 src/model/layer/dense.h              |   7 +-
 11 files changed, 398 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/examples/imagenet/resnet/convert.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/convert.py 
b/examples/imagenet/resnet/convert.py
new file mode 100644
index 0000000..c976bf5
--- /dev/null
+++ b/examples/imagenet/resnet/convert.py
@@ -0,0 +1,48 @@
+import torchfile
+import numpy as np
+import cPickle as pickle
+
+def conv(m, name, params):
+    outplane = m['weight'].shape[0]
+    params[name + '-conv_weight'] = np.reshape(m['weight'], (outplane, -1))
+    return params
+
+def batchnorm(m, name, params):
+    params[name + '-bn_gamma'] = m['weight']
+    params[name + '-bn_beta'] = m['bias']
+    params[name + '-bn_mean'] = m['running_mean']
+    params[name + '-bn_var'] = m['running_var']
+    return params
+
+def block(m, name, params, has_identity):
+    branch=m[0].modules[0].modules
+    params = conv(branch[0], name + '-1', params)
+    params = batchnorm(branch[1], name + '-1', params)
+    params = conv(branch[3], name + '-2', params)
+    params = batchnorm(branch[4], name + '-2', params)
+    params = conv(branch[6], name + '-3', params)
+    params = batchnorm(branch[7], name + '-3', params)
+    if not has_identity:
+        shortcut = m[0].modules[1].modules
+        params = conv(shortcut[0], name + '-shortcut', params)
+        params = batchnorm(shortcut[1], name + '-shortcut', params)
+    return params
+
+def stage(sid, m, num_blk, params):
+    for i in range(num_blk):
+        params = block(m[i].modules, 'stage%d-blk%d' % (sid, i), params, i!=0)
+    return params
+
+params = {}
+model = torchfile.load('wrn-50-2.t7').modules
+params = conv(model[0], 'input', params)
+params = batchnorm(model[1], 'input', params)
+params = stage(0, model[4].modules, 3, params)
+params = stage(1, model[5].modules, 4, params)
+params = stage(2, model[6].modules, 6, params)
+params = stage(3, model[7].modules, 3, params)
+
+params['dense_weight'] = np.transpose(model[10]['weight'])
+params['dense_bias'] = model[10]['bias']
+with open('wrn-50-2.pickle', 'wb') as fd:
+    pickle.dump(params, fd)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/examples/imagenet/resnet/model.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/model.py 
b/examples/imagenet/resnet/model.py
new file mode 100644
index 0000000..7c9a3cf
--- /dev/null
+++ b/examples/imagenet/resnet/model.py
@@ -0,0 +1,138 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+''' This model is created following 
https://github.com/facebook/fb.resnet.torch.git
+'''
+from singa.layer import Conv2D, Activation, MaxPooling2D, AvgPooling2D,\
+        Split, Merge, Flatten, Dense, BatchNormalization, Softmax
+from singa import net as ffnet
+from singa import initializer
+
+ffnet.verbose=True
+
+conv_bias = False
+
+def conv(net, prefix, n, ksize, stride=1, pad=0, bn=True, relu=True, src=None):
+    ret = net.add(Conv2D(
+        prefix + '-conv', n, ksize, stride, pad=pad, use_bias=conv_bias), src)
+    if bn:
+        ret = net.add(BatchNormalization(prefix + '-bn'))
+    if relu:
+        ret = net.add(Activation(prefix + '-relu'))
+    return ret
+
+
+def shortcut(net, prefix, inplane, outplane, stride, src):
+    if inplane == outplane:
+        return src
+    return conv(net, prefix + '-shortcut', outplane, 1, stride, 0, True, 
False, src)
+
+
+def bottleneck(name, net, inplane, midplane, outplane, stride=1, preact=False):
+    split = net.add(Split(name + '-split', 2))
+    conv(net, name + '-1', midplane, 1, 1, 0, True, True, src=split)
+    conv(net, name + '-2', midplane, 3, stride, 1, True, True)
+    br0 = conv(net, name + '-3', outplane, 1, 1, 0, True, False)
+    br1 = shortcut(net, name, inplane, outplane, stride, split)
+    net.add(Merge(name + '-add'), [br0, br1])
+    return net.add(Activation(name + '-relu'))
+
+def basicblock(name, net, inplane, midplane, outplane, stride=1, preact=False):
+    assert midplane==outplane, 'midplan and outplane should be the same'
+    split = net.add(Split(name + '-split', 2))
+    if preact:
+        net.add(BatchNormalization(name + '-preact-bn'), split)
+        net.add(Activation(name + '-preact-relu'))
+    conv(net, name + '-1', outplane, 3, stride, 1, True, True, split)
+    br0 = conv(net, name + '-2', outplane, 3, 1, 1, True, False)
+    br1 = shortcut(net, name, inplane, outplane, stride, split)
+    net.add(Merge(name + '-add'), [br0, br1])
+    return net.add(Activation(name + '-add-relu'))
+
+
+def stage(sid, net, num_blk, inplane, midplane, outplane, stride, block):
+    block('stage%d-blk%d' % (sid, 0), net, inplane, midplane, outplane, stride)
+    for i in range(1, num_blk):
+        block('stage%d-blk%d' % (sid, i), net, outplane, midplane, outplane)
+
+def init_params(net, weight_path):
+    if weight_path == None:
+        for pname, pval in zip(net.param_names(), net.param_values()):
+            print pname, pval.shape
+            if 'conv' in pname and len(pval.shape) > 1:
+                initializer.gaussian(pval, 0, pval.shape[1])
+            elif 'dense' in pname:
+                if len(pval.shape) > 1:
+                    initializer.gaussian(pval, 0, pval.shape[0])
+                else:
+                    pval.set_value(0)
+            # init params from batch norm layer
+            elif 'mean' in pname or 'beta' in pname:
+                pval.set_value(0)
+            elif 'var' in pname:
+                pval.set_value(1)
+            elif 'gamma' in pname:
+                initializer.uniform(pval, 0, 1)
+    else:
+        net.load(weight_path, use_pickle = 'pickle' in weight_path)
+
+def create_resnet(weight_path=None, depth=50):
+    cfg = {
+            50: ([3, 4, 6, 3], bottleneck),
+            101: ([3, 4, 23, 3], bottleneck),
+            152: ([3, 8, 36, 3], bottleneck),
+            }
+    net = ffnet.FeedForwardNet()
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, input_sample_shape=(3, 224, 
224)))
+    net.add(BatchNormalization('input-bn'))
+    net.add(Activation('input_relu'))
+    net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
+
+    conf = cfg[depth]
+    stage(0, net, conf[0][0], 64, 64, 256, 1, conf[1])
+    stage(1, net, conf[0][1], 256, 128, 512, 2, conf[1])
+    stage(2, net, conf[0][2], 512, 256, 1024, 2, conf[1])
+    stage(3, net, conf[0][3], 1024, 512, 2048, 2, conf[1])
+    net.add(AvgPooling2D('avg', 7, 1))
+    net.add(Flatten('flat'))
+    net.add(Dense('dense', 1000))
+
+    init_params(net, weight_path)
+    return net
+
+
+def create_wide_resnet(weight_path=None):
+    net = ffnet.FeedForwardNet()
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, 
input_sample_shape=(3, 224, 224)))
+    net.add(BatchNormalization('input-bn'))
+    net.add(Activation('input_relu'))
+    net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
+
+    stage(0, net, 3, 64, 128, 256, 1, bottleneck)
+    stage(1, net, 4, 256, 256, 512, 2, bottleneck)
+    stage(2, net, 6, 512, 512, 1024, 2, bottleneck)
+    stage(3, net, 3, 1024, 1024, 2048, 2, bottleneck)
+
+    net.add(AvgPooling2D('avg_pool', 7, 1, pad=0))
+    net.add(Flatten('flag'))
+    net.add(Dense('dense', 1000))
+
+    init_params(net, weight_path)
+    return net
+
+
+if __name__ == '__main__':
+    create_net('wrn-50-2.pickle')

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/examples/imagenet/resnet/serve.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/serve.py 
b/examples/imagenet/resnet/serve.py
new file mode 100644
index 0000000..d9609ce
--- /dev/null
+++ b/examples/imagenet/resnet/serve.py
@@ -0,0 +1,138 @@
+import os
+import sys
+import time
+import numpy as np
+import threading
+import traceback
+from scipy.misc import imread, imresize
+from argparse import ArgumentParser
+
+from singa import device
+from singa import tensor
+from singa import data
+from singa import image_tool
+from singa import metric
+from rafiki.agent import Agent, MsgType
+import model
+
+tool = image_tool.ImageTool()
+num_augmentation = 10
+crop_size = 224
+mean = np.array([0.485, 0.456, 0.406])
+std = np.array([ 0.229, 0.224, 0.225])
+def image_transform(img):
+    '''Input an image path and return a set of augmented images (type Image)'''
+    global tool
+    return tool.load(img).resize_by_list([256]).crop5((crop_size, crop_size), 
5).flip(2).get()
+
+
+def predict(net, images, num=10):
+    '''predict probability distribution for one net.
+
+    Args:
+        net: neural net (vgg or resnet)
+        images: a batch of augmented images (type numpy)
+        num: num of augmentations
+    '''
+    prob = net.predict(images)
+    prob = tensor.to_numpy(prob)
+    prob = prob.reshape((images.shape[0] / num, num, -1))
+    prob = np.average(prob, 1)
+    return prob
+
+
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1] in \
+        ["PNG", "png", "jpg", "JPG", "JPEG", "jpeg"]
+
+
+def serve(net, label_map, dev, agent, topk=5):
+    '''Serve to predict image labels.
+
+    It prints the topk food names for each image.
+
+    Args:
+        label_map: a list of food names, corresponding to the index in 
meta_file
+    '''
+
+    images =tensor.Tensor((num_augmentation, 3, crop_size, crop_size), dev)
+    while True:
+        msg, val = agent.pull()
+        if msg is None:
+            time.sleep(0.1)
+            continue
+        msg = MsgType.parse(msg)
+        if msg.is_request():
+            try:
+                # process images
+                im = [np.array(x.convert('RGB'), 
dtype=np.float32).transpose(2, 0, 1) for x in image_transform(val['image'])]
+                im = np.array(im) / 256
+                im -= mean[np.newaxis, :, np.newaxis, np.newaxis]
+                im /= std[np.newaxis, :, np.newaxis, np.newaxis]
+                images.copy_from_numpy(im)
+                print "input: ", images.l1()
+                # do prediction
+                prob = predict(net, images, num_augmentation)[0]
+                idx = np.argsort(-prob)
+                # prepare results
+                response = ""
+                for i in range(topk):
+                    response += "%s:%f <br/>" % (label_map[idx[i]], 
prob[idx[i]])
+            except:
+                traceback.print_exc()
+                response = "sorry, system error during prediction."
+            agent.push(MsgType.kResponse, response)
+        elif msg.is_command():
+            if MsgType.kCommandStop.equal(msg):
+                print 'get stop command'
+                agent.push(MsgType.kStatus, "success")
+                break
+            else:
+                print 'get unsupported command %s' % str(msg)
+                agent.push(MsgType.kStatus, "Unknown command")
+        else:
+            print 'get unsupported message %s' % str(msg)
+            agent.push(MsgType.kStatus, "unsupported msg; going to shutdown")
+            break
+    print "server stop"
+
+def main():
+    try:
+        # Setup argument parser
+        parser = ArgumentParser(description="Wide residual network")
+
+        parser.add_argument("-p", "--port", default=9999, help="listen port")
+        parser.add_argument("-c", "--use_cpu", action="store_true",
+                            help="If set, load models onto CPU devices")
+        parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
+
+        # Process arguments
+        args = parser.parse_args()
+        port = args.port
+
+        # start to train
+        agent = Agent(port)
+
+        net = model.create_wide_resnet(args.parameter_file)
+        dev = device.create_cuda_gpu()
+        net.to_device(dev)
+        print 'Finish loading models'
+
+        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
+        serve(net, labels, dev, agent)
+
+        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
+        # print acc
+
+        # wait the agent finish handling http request
+        agent.stop()
+    except SystemExit:
+        return
+    except:
+        traceback.print_exc()
+        sys.stderr.write("  for help use --help \n\n")
+        return 2
+
+
+if __name__ == '__main__':
+    main()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/python/singa/device.py
----------------------------------------------------------------------
diff --git a/python/singa/device.py b/python/singa/device.py
index 1df4c84..fdd2a92 100644
--- a/python/singa/device.py
+++ b/python/singa/device.py
@@ -132,12 +132,12 @@ def create_cuda_gpu_on(device_id):
 
 def create_opencl_device():
     '''Create the default OpenCL device.
-    
+
     Returns:
         a swig converted OpenCL device.
     '''
     assert singa.USE_OPENCL, 'SINGA has not been compiled with OpenCL enabled.'
-    return singa.Platform.GetDefaultDevice()
+    return singa.Platform.GetDefaultOpenclDevice()
 
 
 default_device = singa.Platform.GetDefaultDevice()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/python/singa/layer.py b/python/singa/layer.py
index 0bea2d2..7975042 100644
--- a/python/singa/layer.py
+++ b/python/singa/layer.py
@@ -337,18 +337,19 @@ class Conv2D(Layer):
         # conf.data_format = data_format
         if W_specs is None:
             W_specs = {'init': 'xavier'}
-        if b_specs is None:
-            b_specs = {'init': 'constant'}
         if 'name' not in W_specs:
             W_specs['name'] = name + '_weight'
-        if 'name' not in b_specs:
-            b_specs['name'] = name + '_bias'
         wspecs = _construct_param_specs_from_dict(W_specs)
         self.conf.param.extend([wspecs])
         self.param_specs.append(wspecs)
-        bspecs = _construct_param_specs_from_dict(b_specs)
-        self.conf.param.extend([bspecs])
-        self.param_specs.append(bspecs)
+        if use_bias:
+            if b_specs is None:
+                b_specs = {'init': 'constant'}
+            if 'name' not in b_specs:
+                b_specs['name'] = name + '_bias'
+            bspecs = _construct_param_specs_from_dict(b_specs)
+            self.conf.param.extend([bspecs])
+            self.param_specs.append(bspecs)
 
         _check_engine(engine, ['cudnn', 'singacpp', 'singacl'])
         self.layer = _create_layer(engine, 'Convolution')
@@ -610,16 +611,19 @@ class Dense(Layer):
         conf.transpose = W_transpose
         if W_specs is None:
             W_specs = {'init': 'xavier'}
-        if b_specs is None:
-            b_specs = {'init': 'constant', 'value': 0}
         if 'name' not in W_specs:
             W_specs['name'] = name + '_weight'
-        if 'name' not in b_specs:
-            b_specs['name'] = name + '_bias'
         wspecs = _construct_param_specs_from_dict(W_specs)
-        bspecs = _construct_param_specs_from_dict(b_specs)
-        self.conf.param.extend([wspecs, bspecs])
-        self.param_specs.extend([wspecs, bspecs])
+        self.conf.param.extend([wspecs])
+        self.param_specs.append(wspecs)
+        if use_bias:
+            if b_specs is None:
+                b_specs = {'init': 'constant', 'value': 0}
+            if 'name' not in b_specs:
+                b_specs['name'] = name + '_bias'
+            bspecs = _construct_param_specs_from_dict(b_specs)
+            self.conf.param.extend([bspecs])
+            self.param_specs.append(bspecs)
         # dense layer is transparent to engine.
         if engine == 'cudnn':
             self.layer = _create_layer('singacuda', 'Dense')
@@ -775,7 +779,6 @@ class Split(Layer):
         input_sample_shape: includes a single integer for the input sample
             feature size.
     '''
-
     def __init__(self, name, num_output, input_sample_shape=None):
         self.num_output = num_output
         self.in_shape = input_sample_shape

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/python/singa/net.py
----------------------------------------------------------------------
diff --git a/python/singa/net.py b/python/singa/net.py
index 027e78c..26fb61d 100644
--- a/python/singa/net.py
+++ b/python/singa/net.py
@@ -386,16 +386,16 @@ class FeedForwardNet(object):
         '''
         if use_pickle:
             params = {}
-            for (specs, val) in zip(self.param_specs(), self.param_values()):
+            for (name, val) in zip(self.param_names(), self.param_values()):
                 val.to_host()
-                params[specs.name] = tensor.to_numpy(val)
+                params[name] = tensor.to_numpy(val)
                 with open(f, 'wb') as fd:
                     pickle.dump(params, fd)
         else:
             sp = snapshot.Snapshot(f, True, buffer_size)
-            for (specs, val) in zip(self.param_specs(), self.param_values()):
+            for (name, val) in zip(self.param_names(), self.param_values()):
                 val.to_host()
-                sp.write(specs.name, val)
+                sp.write(name, val)
 
     def load(self, f, buffer_size=10, use_pickle=False):
         '''Load model parameters using io/snapshot.
@@ -407,18 +407,30 @@ class FeedForwardNet(object):
                     'then set use_pickle=False for loading it'
             with open(f, 'rb') as fd:
                 params = pickle.load(fd)
-                for (specs, val) in zip(self.param_specs(),
-                                        self.param_values()):
+                for name, val in zip(self.param_names(), self.param_values()):
+                    if name not in params:
+                        print 'Param: %s missing in the checkpoint file' % name
+                        continue
                     try:
-                        val.copy_from_numpy(params[specs.name])
+                        val.copy_from_numpy(params[name])
                     except AssertionError as err:
-                        print 'Error from copying values for param: %s' % 
specs.name
-                        print 'shape of param vs checkpoint', val.shape, 
params[specs.name].shape
+                        print 'Error from copying values for param: %s' % name
+                        print 'shape of param vs checkpoint', \
+                                val.shape, params[name].shape
                         raise err
         else:
             print 'NOTE: If your model was saved using pickle, '\
                     'then set use_pickle=True for loading it'
             sp = snapshot.Snapshot(f, False, buffer_size)
             params = sp.read()
-            for (specs, val) in zip(self.param_specs(), self.param_values()):
-                val.copy_data(params[specs.name])
+            for (name, val) in zip(self.param_names(), self.param_values()):
+                if name not in params:
+                    print 'Param: %s missing in the checkpoint file' % name
+                    continue
+                try:
+                    val.copy_data(params[name])
+                except AssertionError as err:
+                    print 'Error from copying values for param: %s' % name
+                    print 'shape of param vs checkpoint', \
+                            val.shape, params[name].shape
+                    raise err

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.cc b/src/model/layer/convolution.cc
index 78ec1af..8940fb2 100644
--- a/src/model/layer/convolution.cc
+++ b/src/model/layer/convolution.cc
@@ -97,7 +97,8 @@ void Convolution::Setup(const Shape &in_sample, const 
LayerConf &conf) {
 
   // Setup shape of weight_ and bias_
   weight_.Reshape(Shape{num_filters_, col_height_});
-  bias_.Reshape(Shape{num_filters_});
+  if (bias_term_)
+    bias_.Reshape(Shape{num_filters_});
   // Assume the order of param is: weight, bias
   for (const auto &spec : conf.param()) param_specs_.push_back(spec);
 }
@@ -143,7 +144,6 @@ const std::pair<Tensor, vector<Tensor>> 
Convolution::Backward(
   Tensor dx;
   Tensor db, dw;
   dx.ResetLike(src_data);
-  db.ResetLike(bias_);
   dw.ResetLike(weight_);
   dw.SetValue(0.0f);
   size_t batchsize = grad.shape(0);
@@ -156,6 +156,7 @@ const std::pair<Tensor, vector<Tensor>> 
Convolution::Backward(
     SumColumns(tmp1, &tmp2);
     Tensor tmp3 = Reshape(tmp2, Shape{batchsize, num_filters_});
 
+    db.ResetLike(bias_);
     SumRows(tmp3, &db);
   }
 
@@ -178,7 +179,8 @@ const std::pair<Tensor, vector<Tensor>> 
Convolution::Backward(
     dx.CopyDataFromHostPtr(dx_b, imagesize, b * imagesize);
   }
   param_grad.push_back(dw);
-  param_grad.push_back(db);
+  if (bias_term_)
+    param_grad.push_back(db);
   delete[] data_col;
   delete[] dx_b;
   return std::make_pair(dx, param_grad);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/convolution.h
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.h b/src/model/layer/convolution.h
index 7b7fd00..89b5319 100644
--- a/src/model/layer/convolution.h
+++ b/src/model/layer/convolution.h
@@ -57,7 +57,10 @@ class Convolution : public Layer {
               const int stride_w, float* data_im);
 
   const std::vector<Tensor> param_values() override {
-    return std::vector<Tensor>{weight_, bias_};
+    if (bias_term_)
+      return std::vector<Tensor>{weight_, bias_};
+    else
+      return std::vector<Tensor>{weight_};
   }
 
   size_t kernel_w() const { return kernel_w_; }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/cudnn_convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_convolution.cc 
b/src/model/layer/cudnn_convolution.cc
index 196d137..03ad8b9 100644
--- a/src/model/layer/cudnn_convolution.cc
+++ b/src/model/layer/cudnn_convolution.cc
@@ -60,7 +60,8 @@ void CudnnConvolution::InitCudnn(const Tensor &input) {
   size_t batchsize = input.shape(0);
   CUDNN_CHECK(cudnnCreateTensorDescriptor(&x_desc_));
   CUDNN_CHECK(cudnnCreateTensorDescriptor(&y_desc_));
-  CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc_));
+  if (bias_term_)
+    CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc_));
   CUDNN_CHECK(cudnnCreateFilterDescriptor(&filter_desc_));
   CUDNN_CHECK(cudnnCreateConvolutionDescriptor(&conv_desc_));
 
@@ -209,11 +210,11 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnConvolution::Backward(
   Tensor dx;
   dx.ResetLike(src_data);
   Tensor db, dw;
-  db.ResetLike(bias_);
   dw.ResetLike(weight_);
 
   // LOG(ERROR) << "backward bias";
   if (bias_term_) {
+    db.ResetLike(bias_);
     dx.device()->Exec([grad, db, this](Context *ctx) {
       Block *dyblock = grad.block(), *dbblock = db.block();
       float alpha = 1.f, beta = 0.f;
@@ -248,7 +249,8 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnConvolution::Backward(
                                  this->x_desc_, dxblock->mutable_data());
   }, {grad.block(), weight_.block()}, {dx.block(), workspace_.block()});
   param_grad.push_back(dw);
-  param_grad.push_back(db);
+  if (bias_term_)
+    param_grad.push_back(db);
   return std::make_pair(dx, param_grad);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index 64e3d86..fac9130 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -38,11 +38,13 @@ void Dense::Setup(const Shape& in_sample, const LayerConf 
&conf) {
   vdim_ = in_sample.at(0);
   hdim_ = dense_conf.num_output();
   transpose_ = dense_conf.transpose();
+  bias_term_ = dense_conf.bias_term();
   if (transpose_)  // was {vdim_, hdim} by zhaojing?
     weight_.Reshape(Shape{hdim_, vdim_});
   else
     weight_.Reshape(Shape{vdim_, hdim_});
-  bias_.Reshape(Shape{hdim_});
+  if (bias_term_)
+    bias_.Reshape(Shape{hdim_});
   for (auto specs: conf.param())
     param_specs_.push_back(specs);
 }
@@ -56,7 +58,8 @@ const Tensor Dense::Forward(int flag, const Tensor &input) {
     output = Mult(input, weight_.T());
   else
     output = Mult(input, weight_);
-  AddRow(bias_, &output);
+  if (bias_term_)
+    AddRow(bias_, &output);
   if (flag & kTrain)
     buf_.push(input);
   return output;
@@ -70,10 +73,12 @@ const std::pair<Tensor, vector<Tensor>> Dense::Backward(int 
flag,
   Tensor src_data = buf_.top();
   buf_.pop();
   Tensor db, dw, dx;
-  db.ResetLike(bias_);
   dw.ResetLike(weight_);
   dx.ResetLike(src_data);
-  SumRows(grad, &db);
+  if (bias_term_) {
+    db.ResetLike(bias_);
+    SumRows(grad, &db);
+  }
   if (transpose_) {
     dx = Mult(grad, weight_);
     dw = Mult(grad.T(), src_data);
@@ -82,7 +87,8 @@ const std::pair<Tensor, vector<Tensor>> Dense::Backward(int 
flag,
     dw = Mult(src_data.T(), grad);
   }
   param_grad.push_back(dw);
-  param_grad.push_back(db);
+  if (bias_term_)
+    param_grad.push_back(db);
   return std::make_pair(dx, param_grad);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/dense.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h
index 8a149a5..8f53699 100644
--- a/src/model/layer/dense.h
+++ b/src/model/layer/dense.h
@@ -46,7 +46,10 @@ class Dense : public Layer {
 
   void ToDevice(std::shared_ptr<Device> device) override;
   const std::vector<Tensor> param_values() override {
-    return std::vector<Tensor>{weight_, bias_};
+    if (bias_term_)
+      return std::vector<Tensor>{weight_, bias_};
+    else
+      return std::vector<Tensor>{weight_};
   }
   size_t num_output() const { return hdim_; }
   size_t num_input() const { return vdim_; }
@@ -67,6 +70,8 @@ class Dense : public Layer {
   /// Used in auto-encoder, where the decoder would share its weight matrix 
from
   /// the encoder's transposed weight matrix.
   bool transpose_ = false;
+  /// use bias or not;
+  bool bias_term_ = true;
   size_t vdim_, hdim_;
   Tensor weight_, bias_;
   // Tensor data_, grad_;

[1/5] incubator-singa git commit: SINGA-300 - Add residual networks for imagenet classification

Reply via email to