[2/5] incubator-singa git commit: SINGA-300 - Add residual networks for imagenet classification

wangsh Thu, 16 Feb 2017 22:25:28 -0800

SINGA-300 - Add residual networks for imagenet classification

udpate convert.py for general resnet models and pre-activation resnet


tested all models for serving; upload pickle files to s3


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/45ec92d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/45ec92d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/45ec92d8

Branch: refs/heads/master
Commit: 45ec92d8ffc1fa1385a9307fdf07e21da939ee2f
Parents: c0317d1
Author: Wei Wang <[email protected]>
Authored: Sat Feb 11 18:39:20 2017 +0800
Committer: Wei Wang <[email protected]>
Committed: Sun Feb 12 22:01:19 2017 +0800

----------------------------------------------------------------------
 examples/imagenet/resnet/convert.py | 136 +++++++++++++-------
 examples/imagenet/resnet/model.py   | 214 +++++++++++++++++++++++++------
 examples/imagenet/resnet/serve.py   |   5 +-
 3 files changed, 271 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/45ec92d8/examples/imagenet/resnet/convert.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/convert.py 
b/examples/imagenet/resnet/convert.py
index c976bf5..e630281 100644
--- a/examples/imagenet/resnet/convert.py
+++ b/examples/imagenet/resnet/convert.py
@@ -1,48 +1,98 @@
+import os
 import torchfile
 import numpy as np
 import cPickle as pickle
+from argparse import ArgumentParser
 
-def conv(m, name, params):
+'''Extract the net parameters from the torch file and store them as python dict
+using cPickle'''
+
+import model
+
+verbose=False
+
+def add_param(idx, name, val, params):
+    if type(params) == dict:
+        assert name not in params, 'duplicated param %s' % name
+        params[name] = val
+    else:
+        assert params[idx].size() == val.size, 'size mismatch for %s: %s - %s' 
% (name, (params[idx].shape,), (val.shape,))
+        params[idx].copy_from_numpy(val)
+
+    if verbose:
+        print name, val.shape
+
+
+def conv(m, idx, params, param_names):
     outplane = m['weight'].shape[0]
-    params[name + '-conv_weight'] = np.reshape(m['weight'], (outplane, -1))
-    return params
-
-def batchnorm(m, name, params):
-    params[name + '-bn_gamma'] = m['weight']
-    params[name + '-bn_beta'] = m['bias']
-    params[name + '-bn_mean'] = m['running_mean']
-    params[name + '-bn_var'] = m['running_var']
-    return params
-
-def block(m, name, params, has_identity):
-    branch=m[0].modules[0].modules
-    params = conv(branch[0], name + '-1', params)
-    params = batchnorm(branch[1], name + '-1', params)
-    params = conv(branch[3], name + '-2', params)
-    params = batchnorm(branch[4], name + '-2', params)
-    params = conv(branch[6], name + '-3', params)
-    params = batchnorm(branch[7], name + '-3', params)
-    if not has_identity:
-        shortcut = m[0].modules[1].modules
-        params = conv(shortcut[0], name + '-shortcut', params)
-        params = batchnorm(shortcut[1], name + '-shortcut', params)
-    return params
-
-def stage(sid, m, num_blk, params):
-    for i in range(num_blk):
-        params = block(m[i].modules, 'stage%d-blk%d' % (sid, i), params, i!=0)
-    return params
-
-params = {}
-model = torchfile.load('wrn-50-2.t7').modules
-params = conv(model[0], 'input', params)
-params = batchnorm(model[1], 'input', params)
-params = stage(0, model[4].modules, 3, params)
-params = stage(1, model[5].modules, 4, params)
-params = stage(2, model[6].modules, 6, params)
-params = stage(3, model[7].modules, 3, params)
-
-params['dense_weight'] = np.transpose(model[10]['weight'])
-params['dense_bias'] = model[10]['bias']
-with open('wrn-50-2.pickle', 'wb') as fd:
-    pickle.dump(params, fd)
+    name = param_names[idx]
+    val = np.reshape(m['weight'], (outplane, -1))
+    add_param(idx, name, val, params)
+    return idx + 1
+
+
+def batchnorm(m, idx, params, param_names):
+    add_param(idx, param_names[idx], m['weight'], params)
+    add_param(idx + 1, param_names[idx + 1], m['bias'], params)
+    add_param(idx + 2, param_names[idx + 2], m['running_mean'], params)
+    add_param(idx + 3, param_names[idx + 3], m['running_var'], params)
+    return idx + 4
+
+
+def linear(m, idx, params, param_names):
+    add_param(idx, param_names[idx], np.transpose(m['weight']), params)
+    add_param(idx + 1, param_names[idx + 1], m['bias'], params)
+    return idx + 2
+
+
+def traverse(m, idx, params, param_names):
+    ''' Traverse all modules of the torch checkpoint file to extract params.
+
+    Args:
+        m, a TorchObject
+        idx, index for the current cursor of param_names
+        params, an empty dictionary (name->numpy) to dump the params via 
pickle;
+            or a list of tensor objects which should be in the same order as
+            param_names, called to initialize net created in Singa directly
+            using param values from torch checkpoint file.
+
+    Returns:
+        the updated idx
+    '''
+    module_type = m.__dict__['_typename']
+    if module_type in ['nn.Sequential', 'nn.ConcatTable'] :
+        for x in m.modules:
+            idx = traverse(x, idx, params, param_names)
+    elif 'SpatialConvolution' in module_type:
+        idx = conv(m, idx, params, param_names)
+    elif 'SpatialBatchNormalization' in module_type:
+        idx = batchnorm(m, idx, params, param_names)
+    elif 'Linear' in module_type:
+        idx = linear(m, idx, params, param_names)
+    return idx
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser(description='Convert params from torch to python '
+            'dict. \n resnet could have depth of 18, 34, 101, 152; \n
+            wrn has depth 50; preact has depth 200; addbn has depth 50')
+    parser.add_argument("infile", help="torch checkpoint file")
+    parser.add_argument("model", choices = ['resnet', 'wrn', 'preact', 
'addbn'])
+    parser.add_argument("depth", type=int, choices = [18, 34, 50, 101, 152, 
200])
+    args = parser.parse_args()
+
+    net = model.create_net(args.model, args.depth)
+    # model.init_params(net)
+    m = torchfile.load(args.infile)
+    params = {}
+    # params = net.param_values()
+    param_names = net.param_names()
+    traverse(m, 0, params, param_names)
+    miss = [name for name in param_names if name not in params]
+    if len(miss) > 0:
+        print 'The following params are missing from torch file'
+        print miss
+
+    outfile = os.path.splitext(args.infile)[0] + '.pickle'
+    with open(outfile, 'wb') as fd:
+        pickle.dump(params, fd)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/45ec92d8/examples/imagenet/resnet/model.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/model.py 
b/examples/imagenet/resnet/model.py
index 7c9a3cf..34dfd9f 100644
--- a/examples/imagenet/resnet/model.py
+++ b/examples/imagenet/resnet/model.py
@@ -14,7 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-''' This model is created following 
https://github.com/facebook/fb.resnet.torch.git
+''' This models are created following 
https://github.com/facebook/fb.resnet.torch.git
+and https://github.com/szagoruyko/wide-residual-networks
 '''
 from singa.layer import Conv2D, Activation, MaxPooling2D, AvgPooling2D,\
         Split, Merge, Flatten, Dense, BatchNormalization, Softmax
@@ -26,6 +27,17 @@ ffnet.verbose=True
 conv_bias = False
 
 def conv(net, prefix, n, ksize, stride=1, pad=0, bn=True, relu=True, src=None):
+    '''Add a convolution layer and optionally a batchnorm and relu layer.
+
+    Args:
+        prefix, a string for the prefix of the layer name
+        n, num of filters for the conv layer
+        bn, if true add batchnorm
+        relu, if true add relu
+
+    Returns:
+        the last added layer
+    '''
     ret = net.add(Conv2D(
         prefix + '-conv', n, ksize, stride, pad=pad, use_bias=conv_bias), src)
     if bn:
@@ -35,40 +47,95 @@ def conv(net, prefix, n, ksize, stride=1, pad=0, bn=True, 
relu=True, src=None):
     return ret
 
 
-def shortcut(net, prefix, inplane, outplane, stride, src):
+def shortcut(net, prefix, inplane, outplane, stride, src, bn=False):
+    '''Add a conv shortcut layer if inplane != outplane; or return the source
+    layer directly.
+
+    Args:
+        prefix, a string for the prefix of the layer name
+        bn, if true add a batchnorm layer after the conv layer
+
+    Returns:
+        return the last added layer or the source layer.
+    '''
     if inplane == outplane:
         return src
-    return conv(net, prefix + '-shortcut', outplane, 1, stride, 0, True, 
False, src)
+    return conv(net, prefix + '-shortcut', outplane, 1, stride, 0, bn, False, 
src)
+
+
+def bottleneck(name, net, inplane, midplane, outplane, stride=1, preact=False, 
add_bn=False):
+    '''Add three conv layers, with a>=b<=c filters.
 
+    The default structure is
+    input
+         -split - conv1-bn1-relu1-conv2-bn2-relu2-conv3-bn3
+                - conv-bn or dummy
+         -add
+         -relu
 
-def bottleneck(name, net, inplane, midplane, outplane, stride=1, preact=False):
+    Args:
+        inplane, num of feature maps of the input
+        midplane, num of featue maps of the middle layer
+        outplane, num of feature maps of the output
+        preact, if true, move the bn3 and relu before conv1, i.e., 
pre-activation ref identity mapping paper
+        add_bn, if true, move the last bn after the addition layer (for 
resnet-50)
+    '''
+    assert not (preact and add_bn), 'preact and batchnorm after addition 
cannot be true at the same time'
     split = net.add(Split(name + '-split', 2))
-    conv(net, name + '-1', midplane, 1, 1, 0, True, True, src=split)
-    conv(net, name + '-2', midplane, 3, stride, 1, True, True)
-    br0 = conv(net, name + '-3', outplane, 1, 1, 0, True, False)
-    br1 = shortcut(net, name, inplane, outplane, stride, split)
-    net.add(Merge(name + '-add'), [br0, br1])
-    return net.add(Activation(name + '-relu'))
-
-def basicblock(name, net, inplane, midplane, outplane, stride=1, preact=False):
-    assert midplane==outplane, 'midplan and outplane should be the same'
+    if preact:
+        net.add(BatchNormalization(name + '-preact-bn'))
+        net.add(Activation(name + '-preact-relu'))
+    conv(net, name + '-0', midplane, 1, 1, 0, True, True)
+    conv(net, name + '-1', midplane, 3, stride, 1, True, True)
+    br0 = conv(net, name + '-2', outplane, 1, 1, 0, not (preact or add_bn), 
False)
+    br1 = shortcut(net, name, inplane, outplane, stride, split, not add_bn)
+    ret = net.add(Merge(name + '-add'), [br0, br1])
+    if add_bn:
+        ret = net.add(BatchNormalization(name + '-add-bn'))
+    if not preact:
+        ret = net.add(Activation(name + '-add-relu'))
+    return ret
+
+
+def basicblock(name, net, inplane, midplane, outplane, stride=1, preact=False, 
add_bn=False):
+    '''Add two conv layers, with a<=b filters.
+
+    The default structure is
+    input
+         -split - conv1-bn1-relu1-conv2-bn2
+                - conv or dummy
+         -add
+         -relu
+
+    Args:
+        inplane, num of feature maps of the input
+        midplane, num of featue maps of the middle layer
+        outplane, num of feature maps of the output
+        preact, if true, move the bn2 and relu before conv1, i.e., 
pre-activation ref identity mapping paper
+        add_bn, if true, move the last bn after the addition layer (for 
resnet-50)
+    '''
+    assert not (preact and add_bn), 'preact and batchnorm after addition 
cannot be true at the same time'
     split = net.add(Split(name + '-split', 2))
     if preact:
-        net.add(BatchNormalization(name + '-preact-bn'), split)
+        net.add(BatchNormalization(name + '-preact-bn'))
         net.add(Activation(name + '-preact-relu'))
-    conv(net, name + '-1', outplane, 3, stride, 1, True, True, split)
-    br0 = conv(net, name + '-2', outplane, 3, 1, 1, True, False)
-    br1 = shortcut(net, name, inplane, outplane, stride, split)
-    net.add(Merge(name + '-add'), [br0, br1])
-    return net.add(Activation(name + '-add-relu'))
+    conv(net, name + '-0', midplane, 3, stride, 1, True, True)
+    br0 = conv(net, name + '-1', outplane, 3, 1, 1, not preact, False)
+    br1 = shortcut(net, name, inplane, outplane, stride, split, False)
+    ret = net.add(Merge(name + '-add'), [br0, br1])
+    if add_bn:
+        ret = net.add(BatchNormalization(name + '-add-bn'))
+    if not preact:
+        ret = net.add(Activation(name + '-add-relu'))
+    return ret
 
 
-def stage(sid, net, num_blk, inplane, midplane, outplane, stride, block):
-    block('stage%d-blk%d' % (sid, 0), net, inplane, midplane, outplane, stride)
+def stage(sid, net, num_blk, inplane, midplane, outplane, stride, block, 
preact=False, add_bn=False):
+    block('stage%d-blk%d' % (sid, 0), net, inplane, midplane, outplane, 
stride, preact, add_bn)
     for i in range(1, num_blk):
-        block('stage%d-blk%d' % (sid, i), net, outplane, midplane, outplane)
+        block('stage%d-blk%d' % (sid, i), net, outplane, midplane, outplane, 
1, preact, add_bn)
 
-def init_params(net, weight_path):
+def init_params(net, weight_path=None):
     if weight_path == None:
         for pname, pval in zip(net.param_names(), net.param_values()):
             print pname, pval.shape
@@ -89,32 +156,90 @@ def init_params(net, weight_path):
     else:
         net.load(weight_path, use_pickle = 'pickle' in weight_path)
 
-def create_resnet(weight_path=None, depth=50):
-    cfg = {
-            50: ([3, 4, 6, 3], bottleneck),
-            101: ([3, 4, 23, 3], bottleneck),
-            152: ([3, 8, 36, 3], bottleneck),
-            }
+
+cfg = { 18: [2, 2, 2, 2],  # basicblock
+        34: [3, 4, 6, 3],  # basicblock
+        50: [3, 4, 6, 3],  # bottleneck
+        101: [3, 4, 23, 3], # bottleneck
+        152: [3, 8, 36, 3], # bottleneck
+        200: [3, 24, 36, 3]} # bottleneck
+
+
+def create_addbn_resnet(depth=50):
+    '''Original resnet with the last batchnorm of each block moved to after 
the addition layer'''
     net = ffnet.FeedForwardNet()
-    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, input_sample_shape=(3, 224, 
224)))
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, 
input_sample_shape=(3, 224, 224)))
     net.add(BatchNormalization('input-bn'))
     net.add(Activation('input_relu'))
     net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
+    conf = cfg[depth]
+    if depth > 34:
+        stage(0, net, conf[0], 64, 64, 256, 1, bottleneck, add_bn=True)
+        stage(1, net, conf[1], 256, 128, 512, 2, bottleneck, add_bn=True)
+        stage(2, net, conf[2], 512, 256, 1024, 2, bottleneck, add_bn=True)
+        stage(3, net, conf[3], 1024, 512, 2048, 2, bottleneck, add_bn=True)
+    else:
+        stage(0, net, conf[0], 64, 64, 64, 1, basicblock, add_bn=True)
+        stage(1, net, conf[1], 64, 128, 128, 2, basicblock, add_bn=True)
+        stage(2, net, conf[2], 128, 256, 256, 2, basicblock, add_bn=True)
+        stage(3, net, conf[3], 256, 512, 512, 2, basicblock, add_bn=True)
+    net.add(AvgPooling2D('avg', 7, 1, pad=0))
+    net.add(Flatten('flat'))
+    net.add(Dense('dense', 1000))
+    return net
+
 
+def create_resnet(depth=18):
+    '''Original resnet, where the there is a relue after the addition layer'''
+    net = ffnet.FeedForwardNet()
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, 
input_sample_shape=(3, 224, 224)))
+    net.add(BatchNormalization('input-bn'))
+    net.add(Activation('input_relu'))
+    net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
     conf = cfg[depth]
-    stage(0, net, conf[0][0], 64, 64, 256, 1, conf[1])
-    stage(1, net, conf[0][1], 256, 128, 512, 2, conf[1])
-    stage(2, net, conf[0][2], 512, 256, 1024, 2, conf[1])
-    stage(3, net, conf[0][3], 1024, 512, 2048, 2, conf[1])
-    net.add(AvgPooling2D('avg', 7, 1))
+    if depth > 34:
+        stage(0, net, conf[0], 64, 64, 256, 1, bottleneck)
+        stage(1, net, conf[1], 256, 128, 512, 2, bottleneck)
+        stage(2, net, conf[2], 512, 256, 1024, 2, bottleneck)
+        stage(3, net, conf[3], 1024, 512, 2048, 2, bottleneck)
+    else:
+        stage(0, net, conf[0], 64, 64, 64, 1, basicblock)
+        stage(1, net, conf[1], 64, 128, 128, 2, basicblock)
+        stage(2, net, conf[2], 128, 256, 256, 2, basicblock)
+        stage(3, net, conf[3], 256, 512, 512, 2, basicblock)
+    net.add(AvgPooling2D('avg', 7, 1, pad=0))
     net.add(Flatten('flat'))
     net.add(Dense('dense', 1000))
+    return net
 
-    init_params(net, weight_path)
+def create_preact_resnet(depth=200):
+    '''Resnet with the batchnorm and relu moved to before the conv layer for 
each block'''
+    net = ffnet.FeedForwardNet()
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, 
input_sample_shape=(3, 224, 224)))
+    net.add(BatchNormalization('input-bn'))
+    net.add(Activation('input_relu'))
+    net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
+    conf = cfg[depth]
+    if depth > 34:
+        stage(0, net, conf[0], 64, 64, 256, 1, bottleneck, preact=True)
+        stage(1, net, conf[1], 256, 128, 512, 2, bottleneck, preact=True)
+        stage(2, net, conf[2], 512, 256, 1024, 2, bottleneck, preact=True)
+        stage(3, net, conf[3], 1024, 512, 2048, 2, bottleneck, preact=True)
+    else:
+        stage(0, net, conf[0], 64, 64, 64, 1, basicblock, preact=True)
+        stage(1, net, conf[1], 64, 128, 128, 2, basicblock, preact=True)
+        stage(2, net, conf[2], 128, 256, 256, 2, basicblock, preact=True)
+        stage(3, net, conf[3], 256, 512, 512, 2, basicblock, preact=True)
+    net.add(BatchNormalization('final-bn'))
+    net.add(Activation('final-relu'))
+    net.add(AvgPooling2D('avg', 7, 1, pad=0))
+    net.add(Flatten('flat'))
+    net.add(Dense('dense', 1000))
     return net
 
 
-def create_wide_resnet(weight_path=None):
+def create_wide_resnet(depth=50):
+    '''Similar original resnet except that a<=b<=c for the bottleneck block'''
     net = ffnet.FeedForwardNet()
     net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, 
input_sample_shape=(3, 224, 224)))
     net.add(BatchNormalization('input-bn'))
@@ -129,10 +254,19 @@ def create_wide_resnet(weight_path=None):
     net.add(AvgPooling2D('avg_pool', 7, 1, pad=0))
     net.add(Flatten('flag'))
     net.add(Dense('dense', 1000))
-
-    init_params(net, weight_path)
     return net
 
 
+def create_net(name, depth):
+    if name == 'resnet':
+        return create_resnet(depth)
+    elif name == 'wrn':
+        return create_wide_resnet(depth)
+    elif name == 'preact':
+        return create_preact_resnet(depth)
+    elif name == 'addbn':
+        return create_addbn_resnet(depth)
+
+
 if __name__ == '__main__':
-    create_net('wrn-50-2.pickle')
+    create_net('wrn', 50)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/45ec92d8/examples/imagenet/resnet/serve.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/serve.py 
b/examples/imagenet/resnet/serve.py
index d9609ce..a6163f7 100644
--- a/examples/imagenet/resnet/serve.py
+++ b/examples/imagenet/resnet/serve.py
@@ -105,6 +105,8 @@ def main():
         parser.add_argument("-c", "--use_cpu", action="store_true",
                             help="If set, load models onto CPU devices")
         parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
+        parser.add_argument("--model", choices = ['resnet', 'wrn', 'preact', 
'addbn'], default='wrn')
+        parser.add_argument("--depth", type=int, choices = [18, 34, 50, 101, 
152, 200], default='50')
 
         # Process arguments
         args = parser.parse_args()
@@ -113,9 +115,10 @@ def main():
         # start to train
         agent = Agent(port)
 
-        net = model.create_wide_resnet(args.parameter_file)
+        net = model.create_net(args.model, args.depth)
         dev = device.create_cuda_gpu()
         net.to_device(dev)
+        model.init_params(net, args.parameter_file)
         print 'Finish loading models'
 
         labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')

[2/5] incubator-singa git commit: SINGA-300 - Add residual networks for imagenet classification

Reply via email to