update singa-run.sh to fix wired bug which happens for display_freq=30 and exec singa as ./singa -model. need to investigate it further
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/921f9277 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/921f9277 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/921f9277 Branch: refs/heads/master Commit: 921f9277fc2b753fb06178f0f216d6ad051cc532 Parents: 5e111c3 Author: wang wei <[email protected]> Authored: Wed May 27 22:38:02 2015 +0800 Committer: wang wei <[email protected]> Committed: Wed May 27 22:38:02 2015 +0800 ---------------------------------------------------------------------- bin/singa-run.sh | 9 +++++---- examples/cifar10/cluster.conf | 4 ++-- examples/cifar10/model.conf | 8 ++++---- src/trainer/worker.cc | 1 + 4 files changed, 12 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/921f9277/bin/singa-run.sh ---------------------------------------------------------------------- diff --git a/bin/singa-run.sh b/bin/singa-run.sh index 2e8d340..3ee50a3 100755 --- a/bin/singa-run.sh +++ b/bin/singa-run.sh @@ -19,7 +19,7 @@ # * See the License for the specific language governing permissions and # * limitations under the License. # */ -# +# # Run a Singa job # @@ -29,7 +29,7 @@ usage="Usage: singa-run.sh" # echo $usage # exit 1 #fi - + BIN=`dirname "${BASH_SOURCE-$0}"` BIN=`cd "$BIN">/dev/null; pwd` BASE=`cd "$BIN/..">/dev/null; pwd` @@ -46,8 +46,9 @@ sleep 3 echo starting singa ... -#echo $@ -./singa $@ +echo "./singa" $@ +#. ./singa $@ +. ./singa $@ echo stopping singa ... http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/921f9277/examples/cifar10/cluster.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/cluster.conf b/examples/cifar10/cluster.conf index 88c3d4b..97c64fd 100644 --- a/examples/cifar10/cluster.conf +++ b/examples/cifar10/cluster.conf @@ -1,6 +1,6 @@ nworker_groups: 1 nserver_groups: 1 nservers_per_group: 1 -nworkers_per_group: 2 -nworkers_per_procs: 2 +nworkers_per_group: 1 +nworkers_per_procs: 1 workspace: "examples/cifar10/" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/921f9277/examples/cifar10/model.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/model.conf b/examples/cifar10/model.conf index 2105360..72ebf8e 100644 --- a/examples/cifar10/model.conf +++ b/examples/cifar10/model.conf @@ -1,7 +1,7 @@ name: "cifar10-convnet" -train_steps: 700 +train_steps: 70000 test_steps:100 -test_frequency:300 +test_frequency:1000 display_frequency:30 updater{ momentum:0.9 @@ -21,7 +21,7 @@ layer{ type: "kShardData" data_param { path: "examples/cifar10/cifar10_train_shard" - batchsize: 100 + batchsize: 128 } exclude: kTest } @@ -30,7 +30,7 @@ layer{ type: "kShardData" data_param { path: "examples/cifar10/cifar10_test_shard" - batchsize: 100 + batchsize: 128 } exclude: kTrain } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/921f9277/src/trainer/worker.cc ---------------------------------------------------------------------- diff --git a/src/trainer/worker.cc b/src/trainer/worker.cc index 955ee29..abfcdf0 100644 --- a/src/trainer/worker.cc +++ b/src/trainer/worker.cc @@ -174,6 +174,7 @@ void Worker::RunOneBatch(int step, Metric* perf){ Test(test_net_, modelproto_.test_steps(), "Test"); } TrainOneBatch(step); + //LOG(ERROR)<<"Train "<<step; if(perf!=nullptr){ auto losslayers=train_net_->losslayers(); for(auto layer: losslayers){
