SINGA-81 Add Python Helper Add comments for some functions in model.py. Remove the rnnlm related code, which could be added later when it can be run sucessfully using python. Move datasets/ into examples as they are used mainly by the examples. Update .gitinore to ignore the pb2 foder in tool/python/.
TODO add comments for other methods in files under singa/ folder. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8914750e Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8914750e Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8914750e Branch: refs/heads/master Commit: 8914750e8c6d6fd0d9d0d8aed53fd775a1367b88 Parents: 3c12730 Author: Wei Wang <[email protected]> Authored: Tue Dec 29 11:36:28 2015 +0800 Committer: chonho <[email protected]> Committed: Fri Jan 1 15:59:15 2016 +0800 ---------------------------------------------------------------------- tool/python/examples/cifar10_cnn.py | 4 +- tool/python/examples/cifar10_cnn_cudnn.py | 4 +- .../python/examples/cifar10_cnn_cudnn_hybrid.py | 34 ----- tool/python/examples/cifar10_cnn_parameter.py | 4 +- tool/python/examples/datasets/__init__.py | 0 tool/python/examples/datasets/cifar10.py | 34 +++++ tool/python/examples/datasets/mnist.py | 32 +++++ tool/python/examples/rnnlm_usermodel.py | 22 ---- tool/python/singa.py | 26 ++-- tool/python/singa/datasets/__init__.py | 0 tool/python/singa/datasets/cifar10.py | 34 ----- tool/python/singa/datasets/mnist.py | 32 ----- tool/python/singa/datasets/rnnlm.py | 20 --- tool/python/singa/model.py | 132 +++++++++++-------- 14 files changed, 164 insertions(+), 214 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/cifar10_cnn.py ---------------------------------------------------------------------- diff --git a/tool/python/examples/cifar10_cnn.py b/tool/python/examples/cifar10_cnn.py index 9ef552b..859a9a4 100755 --- a/tool/python/examples/cifar10_cnn.py +++ b/tool/python/examples/cifar10_cnn.py @@ -1,6 +1,6 @@ #!/usr/bin/env python import sys, os -sys.path.append(os.path.join(os.path.dirname(__file__),'..')) +sys.path.append(os.path.join(os.path.dirname(__file__),'..')) from singa.model import * from singa.datasets import cifar10 @@ -24,7 +24,7 @@ m.add(AvgPooling2D(pool_size=(3,3), stride=2)) m.add(Dense(10, w_wd=250, b_lr=2, b_wd=0, activation='softmax')) -sgd = SGD(decay=0.004, lr_type='fixed', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001)) +sgd = SGD(decay=0.004, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001)) topo = Cluster(workspace) m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo) m.fit(X_train, nb_epoch=1000, with_test=True) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/cifar10_cnn_cudnn.py ---------------------------------------------------------------------- diff --git a/tool/python/examples/cifar10_cnn_cudnn.py b/tool/python/examples/cifar10_cnn_cudnn.py index e3c5c49..d4f4b7c 100755 --- a/tool/python/examples/cifar10_cnn_cudnn.py +++ b/tool/python/examples/cifar10_cnn_cudnn.py @@ -1,6 +1,6 @@ #!/usr/bin/env python import sys, os -sys.path.append(os.path.join(os.path.dirname(__file__),'..')) +sys.path.append(os.path.join(os.path.dirname(__file__),'..')) from singa.model import * from singa.datasets import cifar10 @@ -24,7 +24,7 @@ m.add(AvgPooling2D(pool_size=(3,3), stride=2)) m.add(Dense(10, w_wd=250, b_lr=2, b_wd=0, activation='softmax')) -sgd = SGD(decay=0.004, lr_type='fixed', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001)) +sgd = SGD(decay=0.004, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001)) topo = Cluster(workspace) m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/cifar10_cnn_cudnn_hybrid.py ---------------------------------------------------------------------- diff --git a/tool/python/examples/cifar10_cnn_cudnn_hybrid.py b/tool/python/examples/cifar10_cnn_cudnn_hybrid.py deleted file mode 100755 index f5e4c27..0000000 --- a/tool/python/examples/cifar10_cnn_cudnn_hybrid.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -import sys, os -sys.path.append(os.path.join(os.path.dirname(__file__),'..')) -from singa.model import * -from singa.datasets import cifar10 - -X_train, X_test, workspace = cifar10.load_data() - -m = Sequential('cifar10-cnn', sys.argv) - -m.add(Convolution2D(32, 5, 1, 2, w_std=0.0001, b_lr=2)) -m.add(MaxPooling2D(pool_size=(3,3), stride=2)) -m.add(Activation('relu')) -m.add(LRN2D(3, alpha=0.00005, beta=0.75)) - -m.add(Convolution2D(32, 5, 1, 2, b_lr=2)) -m.add(Activation('relu')) -m.add(AvgPooling2D(pool_size=(3,3), stride=2)) -m.add(LRN2D(3, alpha=0.00005, beta=0.75)) - -m.add(Convolution2D(64, 5, 1, 2)) -m.add(Activation('relu')) -m.add(AvgPooling2D(pool_size=(3,3), stride=2)) - -m.add(Dense(10, w_wd=250, b_lr=2, b_wd=0, activation='softmax')) - -sgd = SGD(decay=0.004, lr_type='fixed', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001)) -topo = Cluster(workspace, nworkers_per_group=2, nworkers_per_procs=2) -m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo) - -gpu_id = [0,1] -m.fit(X_train, nb_epoch=10000, with_test=True, device=gpu_id) -result = m.evaluate(X_test, test_steps=0, test_freq=200) - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/cifar10_cnn_parameter.py ---------------------------------------------------------------------- diff --git a/tool/python/examples/cifar10_cnn_parameter.py b/tool/python/examples/cifar10_cnn_parameter.py index dd03f5c..4144fa5 100755 --- a/tool/python/examples/cifar10_cnn_parameter.py +++ b/tool/python/examples/cifar10_cnn_parameter.py @@ -1,6 +1,6 @@ #!/usr/bin/env python import sys, os -sys.path.append(os.path.join(os.path.dirname(__file__),'..')) +sys.path.append(os.path.join(os.path.dirname(__file__),'..')) from singa.model import * from singa.datasets import cifar10 @@ -27,7 +27,7 @@ m.add(AvgPooling2D(pool_size=(3,3), stride=2)) m.add(Dense(10, w_param=parw, w_wd=250, b_param=parb, b_lr=2, b_wd=0, activation='softmax')) -sgd = SGD(decay=0.004, lr_type='fixed', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001)) +sgd = SGD(decay=0.004, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001)) topo = Cluster(workspace) m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo) m.fit(X_train, nb_epoch=100, with_test=True) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/datasets/__init__.py ---------------------------------------------------------------------- diff --git a/tool/python/examples/datasets/__init__.py b/tool/python/examples/datasets/__init__.py new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/datasets/cifar10.py ---------------------------------------------------------------------- diff --git a/tool/python/examples/datasets/cifar10.py b/tool/python/examples/datasets/cifar10.py new file mode 100644 index 0000000..65bcd60 --- /dev/null +++ b/tool/python/examples/datasets/cifar10.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +from singa.model import * + +def load_data( + workspace = None, + backend = 'kvfile', + batchsize = 64, + random = 5000, + shape = (3, 32, 32), + std = 127.5, + mean = 127.5 + ): + + # using cifar10 dataset + data_dir = 'examples/cifar10' + path_train = data_dir + '/train_data.bin' + path_test = data_dir + '/test_data.bin' + path_mean = data_dir + '/image_mean.bin' + if workspace == None: workspace = data_dir + + store = Store(path=path_train, mean_file=path_mean, backend=backend, + random_skip=random, batchsize=batchsize, + shape=shape) + + data_train = Data(load='recordinput', phase='train', conf=store) + + store = Store(path=path_test, mean_file=path_mean, backend=backend, + batchsize=batchsize, + shape=shape) + + data_test = Data(load='recordinput', phase='test', conf=store) + + return data_train, data_test, workspace + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/datasets/mnist.py ---------------------------------------------------------------------- diff --git a/tool/python/examples/datasets/mnist.py b/tool/python/examples/datasets/mnist.py new file mode 100644 index 0000000..c8695ec --- /dev/null +++ b/tool/python/examples/datasets/mnist.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +from singa.model import * + +def load_data( + workspace = None, + backend = 'kvfile', + nb_rbm = 0, # the number of layers for RBM and Autoencoder + checkpoint_steps = 0, + **pvalues + ): + + # using mnist dataset + data_dir = 'examples/mnist' + path_train = data_dir + '/train_data.bin' + path_test = data_dir + '/test_data.bin' + if workspace == None: workspace = data_dir + + # checkpoint path to load + checkpoint_list = None + if checkpoint_steps > 0: + workerid = 0 + checkpoint_list = [] + for i in range(nb_rbm-1, 0, -1): + checkpoint_list.append('examples/rbm/rbm{0}/checkpoint/step{1}-worker{2}'.format(str(i),checkpoint_steps,workerid)) + + store = Store(path=path_train, backend=backend, **pvalues) + data_train = Data(load='recordinput', phase='train', conf=store, checkpoint=checkpoint_list) + + store = Store(path=path_test, backend=backend, **pvalues) + data_test = Data(load='recordinput', phase='test', conf=store) + + return data_train, data_test, workspace http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/rnnlm_usermodel.py ---------------------------------------------------------------------- diff --git a/tool/python/examples/rnnlm_usermodel.py b/tool/python/examples/rnnlm_usermodel.py deleted file mode 100755 index 1b49321..0000000 --- a/tool/python/examples/rnnlm_usermodel.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python -import sys, os -sys.path.append(os.path.join(os.path.dirname(__file__),'..')) -from singa.model import * -from singa.datasets import rnnlm - -vocab_size = 3720 - -X_train, X_valid, workspace = rnnlm.load_data() - -m = Sequential('rnnlm', sys.argv) - -parw = Parameter(init='uniform', range=0.3) -m.add(Embedding(in_dim=vocab_size, out_dim=15, w_param=parw)) -m.add(RNNLM(1, w_param=parw)) - -sgd = SGD(lr_type='fixed', step=(0,48810,56945,65080,73215), step_lr=(0.1,0.05,0.025,0.0125,0.00625)) -topo = Cluster(workspace) -m.compile(loss='user_loss_rnnlm', in_dim=vocab_size, nclass=100, optimizer=sgd, cluster=topo) - -m.fit(X_train, validate=X_valid, validate_steps=683, nb_epoch=81350, execpath='examples/rnnlm/rnnlm.bin') -#result = m.evaluate(X_valid, validate_steps=683, validate_freq=8135, execpath='examples/rnnlm/rnnlm.bin') http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa.py ---------------------------------------------------------------------- diff --git a/tool/python/singa.py b/tool/python/singa.py index 6d7fbdf..986a6b8 100755 --- a/tool/python/singa.py +++ b/tool/python/singa.py @@ -31,14 +31,18 @@ import singa.driver as driver from google.protobuf.text_format import Merge if __name__ == '__main__': - i = sys.argv.index("-conf") - s = open(sys.argv[i+1], 'r').read() - s = str(s) - j = job_pb2.JobProto() - Merge(s,j) - b = j.SerializeToString() - d = driver.Driver() - d.InitLog(sys.argv[0]) - d.Init(sys.argv) -# d.Train(False,b) - d.Test(b) + """Invoke the training program using this python script. + ./bin/singa-run.sh -exec tool/python/singa.py -conf examples/cifar10/job.conf + """" + + i = sys.argv.index("-conf") + s = open(sys.argv[i+1], 'r').read() + s = str(s) + j = job_pb2.JobProto() + Merge(s,j) + b = j.SerializeToString() + d = driver.Driver() + d.InitLog(sys.argv[0]) + d.Init(sys.argv) + d.Train(False,b) + #d.Test(b) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/datasets/__init__.py ---------------------------------------------------------------------- diff --git a/tool/python/singa/datasets/__init__.py b/tool/python/singa/datasets/__init__.py deleted file mode 100644 index e69de29..0000000 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/datasets/cifar10.py ---------------------------------------------------------------------- diff --git a/tool/python/singa/datasets/cifar10.py b/tool/python/singa/datasets/cifar10.py deleted file mode 100644 index 65bcd60..0000000 --- a/tool/python/singa/datasets/cifar10.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -from singa.model import * - -def load_data( - workspace = None, - backend = 'kvfile', - batchsize = 64, - random = 5000, - shape = (3, 32, 32), - std = 127.5, - mean = 127.5 - ): - - # using cifar10 dataset - data_dir = 'examples/cifar10' - path_train = data_dir + '/train_data.bin' - path_test = data_dir + '/test_data.bin' - path_mean = data_dir + '/image_mean.bin' - if workspace == None: workspace = data_dir - - store = Store(path=path_train, mean_file=path_mean, backend=backend, - random_skip=random, batchsize=batchsize, - shape=shape) - - data_train = Data(load='recordinput', phase='train', conf=store) - - store = Store(path=path_test, mean_file=path_mean, backend=backend, - batchsize=batchsize, - shape=shape) - - data_test = Data(load='recordinput', phase='test', conf=store) - - return data_train, data_test, workspace - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/datasets/mnist.py ---------------------------------------------------------------------- diff --git a/tool/python/singa/datasets/mnist.py b/tool/python/singa/datasets/mnist.py deleted file mode 100644 index c8695ec..0000000 --- a/tool/python/singa/datasets/mnist.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python -from singa.model import * - -def load_data( - workspace = None, - backend = 'kvfile', - nb_rbm = 0, # the number of layers for RBM and Autoencoder - checkpoint_steps = 0, - **pvalues - ): - - # using mnist dataset - data_dir = 'examples/mnist' - path_train = data_dir + '/train_data.bin' - path_test = data_dir + '/test_data.bin' - if workspace == None: workspace = data_dir - - # checkpoint path to load - checkpoint_list = None - if checkpoint_steps > 0: - workerid = 0 - checkpoint_list = [] - for i in range(nb_rbm-1, 0, -1): - checkpoint_list.append('examples/rbm/rbm{0}/checkpoint/step{1}-worker{2}'.format(str(i),checkpoint_steps,workerid)) - - store = Store(path=path_train, backend=backend, **pvalues) - data_train = Data(load='recordinput', phase='train', conf=store, checkpoint=checkpoint_list) - - store = Store(path=path_test, backend=backend, **pvalues) - data_test = Data(load='recordinput', phase='test', conf=store) - - return data_train, data_test, workspace http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/datasets/rnnlm.py ---------------------------------------------------------------------- diff --git a/tool/python/singa/datasets/rnnlm.py b/tool/python/singa/datasets/rnnlm.py deleted file mode 100644 index ef8142a..0000000 --- a/tool/python/singa/datasets/rnnlm.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python -from singa.model import * - -def load_data( - workspace = 'examples/rnnlm', - backend = 'kvfile', - max_window = 10 - ): - - path_train = workspace + '/train_data.bin' - path_valid = workspace + '/valid_data.bin' - path_test = workspace + '/test_data.bin' - - - data_train = Data(load='kData', phase='train', path=path_train, backend=backend, max_window=max_window) - - data_valid = Data(load='kData', phase='val', path=path_valid, max_window=max_window) - - return data_train, data_valid, workspace - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/model.py ---------------------------------------------------------------------- diff --git a/tool/python/singa/model.py b/tool/python/singa/model.py index d68d143..29db70e 100644 --- a/tool/python/singa/model.py +++ b/tool/python/singa/model.py @@ -1,8 +1,8 @@ #!/usr/bin/env python import sys, re, subprocess from layer import * -from utils.utility import * -from utils.message import * +from utils.utility import * +from utils.message import * from google.protobuf import text_format class Model(object): @@ -14,14 +14,14 @@ class Model(object): argv // pass sys.argv to source label = (bool) // exist label layer (depreciated) ''' - self.jobconf = Message('Job', name=name).proto + self.jobconf = Message('Job', name=name).proto self.layers = [] self.label = label self.argv = argv self.result = None self.last_checkpoint_path = None self.cudnn = False - + def exist_datalayer(self, phase): for ly in self.layers: if enumPhase(phase) in ly.layer.include: @@ -38,7 +38,7 @@ class Model(object): topk = (int) // the number of results considered to compute accuracy ''' assert optimizer != None, 'optimizer (Updater component) should be set' - assert cluster != None, 'cluster (Cluster component) should be set' + assert cluster != None, 'cluster (Cluster component) should be set' setval(self.jobconf, updater=optimizer.proto) setval(self.jobconf, cluster=cluster.proto) @@ -56,7 +56,7 @@ class Model(object): # revise the last layer if loss == 'categorical_crossentropy': setval(ly, type=enumLayerType('softmaxloss')) - setval(ly.softmaxloss_conf, topk=topk) + setval(ly.softmaxloss_conf, topk=topk) elif loss == 'mean_squared_error': setval(ly, type=enumLayerType('euclideanloss')) else: @@ -72,7 +72,7 @@ class Model(object): ''' construct neuralnet proto ''' - net = NetProto() + net = NetProto() slyname = self.layers[0].layer.name for i in range(len(self.layers)): ly = net.layer.add() @@ -95,7 +95,7 @@ class Model(object): # deal with label layer (depreciated) if self.label == True: - label_layer = Layer(name='label', type=kLabel) + label_layer = Layer(name='label', type=kLabel) ly = net.layer.add() ly.CopyFrom(label_layer.layer) getattr(ly, 'srclayers').append(self.layers[0].layer.name) @@ -108,7 +108,7 @@ class Model(object): # use of cudnn if self.cudnn == True: - self.setCudnnLayerType(net) + self.setCudnnLayerType(net) setval(self.jobconf, neuralnet=net) @@ -127,7 +127,7 @@ class Model(object): batch_size = (int) // batch size for training data train_steps = (int) // the number of steps for training, i.e., epoch disp_freq = (int) // frequency to display training info - disp_after = (int) // display after this number + disp_after = (int) // display after this number validate_data = (Data) // validation data, specified in load_data() validate_freq = (int) // frequency of validation validate_steps = (int) // total number of steps for validation @@ -143,7 +143,7 @@ class Model(object): setval(data.layer.store_conf, batchsize=fields['batch_size']) # insert layer for training - if self.exist_datalayer('train') == False: + if self.exist_datalayer('train') == False: self.layers.insert(0, data) setval(self.jobconf, train_steps=nb_epoch) setval(self.jobconf, disp_freq=nb_epoch/10) @@ -163,8 +163,8 @@ class Model(object): # save model parameter (i.e., checkpoint_path) setval(self.jobconf, checkpoint_freq=nb_epoch) self.last_checkpoint_path = '{0}/step{1}-worker0'.format( - self.jobconf.cluster.workspace, nb_epoch) - + self.jobconf.cluster.workspace, nb_epoch) + # set Train_one_batch component, using backprogapation at default setval(self.jobconf, train_one_batch=Algorithm(type=enumAlgType(alg)).proto) @@ -174,7 +174,7 @@ class Model(object): self.cudnn = True # start to run singa for training - if with_test == False: + if with_test == False: self.build() # construct Nneuralnet Component #self.display() return SingaRun(jobproto=self.jobconf, argv=self.argv, execpath=execpath) @@ -191,13 +191,13 @@ class Model(object): optional alg = (string) // algorithm type, (backpropagation at default) checkpoint_path = (list) // checkpoint path is necessary only for testing - execpaths = (string) // path to user's own executable + execpaths = (string) // path to user's own executable device = (int/list) // a list of gpu ids **fields (KEY=VALUE) batch_size = (int) // batch size for testing data test_freq = (int) // frequency of testing - test_steps = (int) // total number of steps for testing - test_after = (int) // start testing after this number of steps + test_steps = (int) // total number of steps for testing + test_after = (int) // start testing after this number of steps ''' assert data != None, 'Testing data should be set' is_testonly = False @@ -206,11 +206,11 @@ class Model(object): setval(data.layer.store_conf, batchsize=fields['batch_size']) # insert layer for testing - if self.exist_datalayer('test') == False: + if self.exist_datalayer('test') == False: self.layers.insert(0, data) # loading checkpoint if singa runs only for testing - if self.exist_datalayer('train') == False: + if self.exist_datalayer('train') == False: is_testonly = True if checkpoint_path == None: print 'checkpoint_path has not been specified' @@ -220,7 +220,7 @@ class Model(object): steps = fields['test_steps'] if 'test_steps' in fields else 10 setval(self.jobconf, test_steps=steps) setval(self.jobconf, **fields) - + # set Train_one_batch component, using backprogapation at default setval(self.jobconf, train_one_batch=Algorithm(type=enumAlgType(alg)).proto) @@ -231,16 +231,16 @@ class Model(object): self.build() # construct Nneuralnet Component - #--- generate job.conf file for debug purpose + #--- generate job.conf file for debug purpose #filename = 'job.conf' #with open(filename, 'w') as f: # f.write(text_format.MessageToString(self.jobconf.cluster)) #self.display() - #--- run singa --- + #--- run singa --- return SingaRun(jobproto=self.jobconf, argv=self.argv, execpath=execpath, testmode=is_testonly) #return SingaRun_script(filename=filename, execpath=execpath) - + def display(self): ''' print out job proto @@ -260,13 +260,13 @@ class Model(object): elif ly_type == kSoftmaxLoss: cudnn_ly_type = kCudnnSoftmaxLoss elif ly_type == kSTanh: cudnn_ly_type = kCudnnActivation - net.layer[i].activation_conf.type = STANH + net.layer[i].activation_conf.type = STANH elif ly_type == kSigmoid: cudnn_ly_type = kCudnnActivation - net.layer[i].activation_conf.type = SIGMOID + net.layer[i].activation_conf.type = SIGMOID elif ly_type == kReLU: cudnn_ly_type = kCudnnActivation - net.layer[i].activation_conf.type = RELU + net.layer[i].activation_conf.type = RELU net.layer[i].type = cudnn_ly_type @@ -277,7 +277,7 @@ class Energy(Model): def add(self, layer): if hasattr(layer, 'layer_type'): if layer.layer_type == kRBMVis: - dim = 0 + dim = 0 for i in range(1, len(layer.out_dim)): parw = Parameter(name='w', init='none', level=i) parb = Parameter(name='b', init='none', level=i) @@ -293,7 +293,7 @@ class Sequential(Model): def add(self, layer): if hasattr(layer, 'layer_type'): if layer.layer_type == 'AutoEncoder': - dim = 0 + dim = 0 if layer.param_share == True: # Encoding for i in range(1, len(layer.hid_dim)+1): @@ -331,9 +331,9 @@ class Store(object): ''' **kwargs path = (string) // path to dataset - backend = (string) // + backend = (string) // batch_size = (int) // batch size of dataset - shape = (int) // + shape = (int) // ''' self.proto = Message('Store', **kwargs).proto @@ -357,23 +357,23 @@ class Updater(object): lr_type = (string) // type of the learning rate (Fixed at default) ''' upd = Message('Updater', type=upd_type, **fields).proto - setval(upd.learning_rate, base_lr=lr) + setval(upd.learning_rate, base_lr=lr) if decay > 0: - setval(upd, weight_decay=decay) + setval(upd, weight_decay=decay) if momentum > 0: - setval(upd, momentum=momentum) + setval(upd, momentum=momentum) - if lr_type == None: - setval(upd.learning_rate, type=kFixed) + if lr_type == None or lr_type == "fixed": + setval(upd.learning_rate, type=kFixed) elif lr_type == 'step': cp = Message('Step', change_freq=60, gamma=0.997) - setval(upd.learning_rate, type=kStep, step_conf=cp.proto) - elif lr_type == 'fixedstep': + setval(upd.learning_rate, type=kStep, step_conf=cp.proto) + elif lr_type == 'manual': cp = Message('FixedStep', step=step, step_lr=step_lr) - setval(upd.learning_rate, type=kFixedStep, fixedstep_conf=cp.proto) + setval(upd.learning_rate, type=kFixedStep, fixedstep_conf=cp.proto) elif lr_type == 'linear': cp = Message('Linear', change_freq=10, final_lr=0.1) - setval(upd.learning_rate, type=kLinear, linear_conf=cp.proto) + setval(upd.learning_rate, type=kLinear, linear_conf=cp.proto) self.proto = upd @@ -422,6 +422,15 @@ class AdaGrad(Updater): class Cluster(object): + """ Specify the cluster topology, e.g., number of workers/servers. + + Currently we need to create this object in the .py file and also provide a + cluster configuration file to the command line. TODO(wangwei) update SINGA + code to eliminate the requirement of the cluster configuration file for + training on a single node or the cluster object in the pyfile for training + in a cluster. + """ + def __init__(self, workspace=None, nworker_groups=1, nserver_groups=1, nworkers_per_group=1, nservers_per_group=1, @@ -443,65 +452,78 @@ class Cluster(object): assert workspace != None, 'need to set workspace' self.proto = Message('Cluster', workspace=workspace).proto # optional - self.proto.nworker_groups = nworker_groups - self.proto.nserver_groups = nserver_groups - self.proto.nworkers_per_group = nworkers_per_group - self.proto.nservers_per_group = nservers_per_group - self.proto.nworkers_per_procs = nworkers_per_procs - self.proto.nservers_per_procs = nservers_per_procs + self.proto.nworker_groups = nworker_groups + self.proto.nserver_groups = nserver_groups + self.proto.nworkers_per_group = nworkers_per_group + self.proto.nservers_per_group = nservers_per_group + self.proto.nworkers_per_procs = nworkers_per_procs + self.proto.nservers_per_procs = nservers_per_procs # other fields setval(self.proto, **fields) def StoreResults(lines): + """ Parsing metrics from each line in the log file. - resultDic = {} + TODO(wangwei) format the log string to make them uniform for easy parsing + Another approach is creating a protobuf message for metrics, which can be + used for dumping metrics to string and loading perf string back to messages. + """ + + resultDic = {} for line in lines: line = re.findall(r'[\w|*.*]+', line) if 'Train' in line: step = line[line.index('step')+1] if 'accuracy' in line: - resultDic.setdefault(step,{})['acc'] = line[line.index('accuracy')+1] + resultDic.setdefault(step,{})['acc'] = line[line.index('accuracy')+1] if 'loss' in line: - resultDic.setdefault(step,{})['loss'] = line[line.index('loss')+1] + resultDic.setdefault(step,{})['loss'] = line[line.index('loss')+1] if 'ppl' in line: - resultDic.setdefault(step,{})['ppl'] = line[line.index('ppl')+1] + resultDic.setdefault(step,{})['ppl'] = line[line.index('ppl')+1] if 'Squared' in line: - resultDic.setdefault(step,{})['se'] = line[line.index('Squared')+2] + resultDic.setdefault(step,{})['se'] = line[line.index('Squared')+2] return resultDic def SingaRun(jobproto='', argv=[], execpath='', testmode=False): import singa.driver as driver d = driver.Driver() - d.InitLog(argv[0]) + d.InitLog(argv[0]) d.Init(argv) if testmode == True: d.Test(jobproto.SerializeToString()) else: d.Train(False, jobproto.SerializeToString()) + # Get the performance from the latest log file. + # TODO(wangwei) the log file would be overwritten by other running instance of + # the same program, e.g., lt-singa logfile = '/tmp/singa-log/{0}.ERROR'.format(argv[0].split('/')[-1]) fin = open(logfile, 'r') result = StoreResults(fin.readlines()) - + return result def SingaRun_script(filename='', execpath=''): + """ + Deprecated. + Generate the job conf file and run the shell command. + """ SINGAROOT = '../../../' conf = 'examples/' + filename if execpath=='': cmd = SINGAROOT+'bin/singa-run.sh ' \ - + '-conf %s ' % conf + + '-conf %s ' % conf else: cmd = SINGAROOT+'bin/singa-run.sh ' \ + '-conf %s ' % conf \ - + '-exec %s ' % execpath + + '-exec %s ' % execpath procs = subprocess.Popen(cmd.strip().split(' '), stdout = subprocess.PIPE, stderr = subprocess.STDOUT) - resultDic = {} + resultDic = {} outputlines = iter(procs.stdout.readline, '') resultDic = StoreResults(outputlines)
