Repository: incubator-singa Updated Branches: refs/heads/master a6eea9c4a -> b00dc32fb
SINGA-148 Race condition between Worker threads and Driver The worker may query the device id before the driver sets it up (via Context). It is fixed by sleeping the worker until the driver finishes the setting. Now all devices (GPU and CPU) must be setup via Context::SetupDevice, otherwise the worker would sleep forever. The Blob math functions now check device_id < 0 to call CPU/GPU functions. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/96794175 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/96794175 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/96794175 Branch: refs/heads/master Commit: 9679417509baf62e0c565e0cec140844b778d827 Parents: 259422c Author: Wei Wang <[email protected]> Authored: Tue Mar 29 11:49:01 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Tue Mar 29 11:49:01 2016 +0800 ---------------------------------------------------------------------- include/singa/utils/context.h | 2 +- include/singa/utils/math_blob.h | 44 ++++++++++++++++++------------------ src/worker.cc | 7 +++++- 3 files changed, 29 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/96794175/include/singa/utils/context.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h index b1128c1..3490d29 100644 --- a/include/singa/utils/context.h +++ b/include/singa/utils/context.h @@ -113,7 +113,7 @@ class Context { if (device_id_.find(tid) != device_id_.end()) return device_id_[tid]; else - return -1; + return -2; } /** * Setup the CPU thread, which may be assigned a GPU device. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/96794175/include/singa/utils/math_blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h index 778824e..93967b4 100644 --- a/include/singa/utils/math_blob.h +++ b/include/singa/utils/math_blob.h @@ -46,7 +46,7 @@ template<typename Dtype> void Scale(Dtype alpha, Blob<Dtype> * B) { auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_scale(B->count(), alpha, B->mutable_cpu_data()); } else { #ifdef USE_GPU @@ -64,7 +64,7 @@ void AXPY(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype> * B) { CHECK_EQ(A.count(), B->count()); auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_axpy(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data()); } else { #ifdef USE_GPU @@ -104,7 +104,7 @@ void GEMV(Dtype alpha, Dtype beta, const Blob<Dtype>& A, bool TranA = A.transpose(); auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_gemv(A.cpu_data(), B.cpu_data(), m, n, alpha, beta, TranA, C->mutable_cpu_data()); } else { @@ -169,7 +169,7 @@ void GEMM(Dtype alpha, Dtype beta, const Blob<Dtype>& A, const Blob<Dtype>& B, bool TranB = B.transpose(); auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, k, alpha, beta, TranA, TranB, C->mutable_cpu_data()); } else { @@ -212,7 +212,7 @@ Dtype VVDot(const Blob<Dtype> & A, const Blob<Dtype> & B) { int n = A.count(); auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { res = cpu_dot(A.cpu_data(), B.cpu_data(), n); } else { #ifdef USE_GPU @@ -241,7 +241,7 @@ void OuterProduct(const Blob<Dtype>& A, const Blob<Dtype>& B, Blob<Dtype> * C) { CHECK_EQ(C->count(), m * n); auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, 1, 1, 0, false, false, C->mutable_cpu_data()); } else { @@ -262,7 +262,7 @@ void Map(const Blob<Dtype> & A, Blob<Dtype> * B) { CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size"; auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_e_f<Op>(A.count(), A.cpu_data(), B->mutable_cpu_data()); } else { #ifdef USE_GPU @@ -285,7 +285,7 @@ void Map(const Blob<Dtype> & A, const Blob<Dtype> & B, Blob<Dtype> * C) { //cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data()); auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data()); } else { #ifdef USE_GPU @@ -304,7 +304,7 @@ void Map(Dtype alpha, const Blob<Dtype>& A, Blob<Dtype>* B) { CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size"; auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data()); } else { #ifdef USE_GPU @@ -324,7 +324,7 @@ void Map(Dtype alpha, const Blob<Dtype>& A, const Blob<Dtype>& B, CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size"; auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->cpu_data(), C->mutable_cpu_data()); } else { @@ -346,7 +346,7 @@ void Copy(const Blob<Dtype>& A, Blob<Dtype>* B) { CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size"; auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { std::copy(A.cpu_data(), A.cpu_data() + A.count(), B->mutable_cpu_data()); } else { #ifdef USE_GPU @@ -474,7 +474,7 @@ void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { one.SetValue(1); auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_gemm(A.cpu_data(), one.cpu_data(), m, n, 1, alpha, beta, false, false, B->mutable_cpu_data()); } else { @@ -511,7 +511,7 @@ void MVAddRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { int n = A.count(), m = B->count() / n; auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { Blob<Dtype> one(m); one.SetValue(1); cpu_gemm(one.cpu_data(), A.cpu_data(), m, n, 1, alpha, beta, @@ -566,7 +566,7 @@ void MVSumCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { int m = B->count(), n = A.count() / m; auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { Blob<Dtype> one(n); one.SetValue(1); cpu_gemm(A.cpu_data(), one.cpu_data(), m, 1, n, alpha, beta, @@ -591,7 +591,7 @@ void MVSumRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { int n = B->count(), m = A.count() / n; auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { Blob<Dtype> one(m); one.SetValue(1); cpu_gemm(one.cpu_data(), A.cpu_data(), 1, n, m, alpha, beta, false, @@ -615,7 +615,7 @@ void Reduce2D(const Blob<Dtype> & A, Blob<Dtype> * B) { int m = B->count(), n = A.count() / m; auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_reduce_f<Op>(A.cpu_data(), m, n, B->mutable_cpu_data()); } else { #ifdef USE_GPU @@ -635,7 +635,7 @@ void Expand2D(const Blob<Dtype> & A, Blob<Dtype> * B) { int m = A.count(), n = B->count() / m; auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_expand_f<Op>(A.cpu_data(), m, n, B->mutable_cpu_data()); } else { #ifdef USE_GPU @@ -653,7 +653,7 @@ Dtype Asum(const Blob<Dtype>& A) { auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); Dtype ret = Dtype(0); - if (device == -1) { + if (device < 0) { ret = cpu_asum(A.count(), A.cpu_data(), 1) / A.count(); } else { #ifdef USE_GPU @@ -671,7 +671,7 @@ void SampleUniform(Dtype low, Dtype high, Blob<Dtype>* A) { auto context = Singleton<Context>::Instance(); const auto& thread = std::this_thread::get_id(); int device = context->device_id(thread); - if (device == -1) { + if (device < 0) { cpu_sample_uniform(*context->rand_generator(thread), A->count(), low, high, A->mutable_cpu_data()); } else { @@ -689,7 +689,7 @@ void SampleGaussian(Dtype mean, Dtype std, Blob<Dtype>* A) { auto context = Singleton<Context>::Instance(); const auto& thread = std::this_thread::get_id(); int device = context->device_id(thread); - if (device == -1) { + if (device < 0) { cpu_sample_gaussian(*context->rand_generator(thread), A->count(), mean, std, A->mutable_cpu_data()); } else { @@ -708,7 +708,7 @@ void Softmax(int nb_rows, const Blob<Dtype>& A, Blob<Dtype>* B) { CHECK_EQ(A.count(), B->count()); auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { cpu_softmax(nb_rows, A.count() / nb_rows, A.cpu_data(), B->mutable_cpu_data()); } else { @@ -721,7 +721,7 @@ template<typename Dtype> void Zero(Blob<Dtype>* B) { auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); - if (device == -1) { + if (device < 0) { B->SetValue(0); } else { #ifdef USE_GPU http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/96794175/src/worker.cc ---------------------------------------------------------------------- diff --git a/src/worker.cc b/src/worker.cc index 2afa8b0..1e35ff9 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -64,7 +64,12 @@ Worker::~Worker() { void Worker::Run() { // setup gpu device auto context = Singleton<Context>::Instance(); - int device = context->device_id(std::this_thread::get_id()); + // TODO(wangwei) -2 for uninitial device; -1 for CPU; >=0 for GPU now. + int device = -2; + while (device == -2) { + device = context->device_id(std::this_thread::get_id()); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } LOG(ERROR) << "Worker (group = " << grp_id_ <<", id = " << id_ << ") " << " start on " << (device >= 0 ? "GPU " + std::to_string(device) : "CPU"); if (device >= 0)
