[GitHub] [arrow] bkietz commented on a change in pull request #10233: ARROW-12641: [C++] Provide thread id accessors

GitBox Tue, 04 May 2021 07:45:22 -0700


bkietz commented on a change in pull request #10233:
URL: https://github.com/apache/arrow/pull/10233#discussion_r625846193




##########
File path: cpp/src/arrow/util/thread_pool_test.cc
##########
@@ -512,6 +512,98 @@ TEST_F(TestThreadPool, Submit) {
   }
 }
 
+TEST_F(TestThreadPool, GetCurrentThreadPool) {
+  ASSERT_EQ(ThreadPool::GetCurrentThreadPool(), nullptr);
+
+  auto pool = this->MakeThreadPool(5);
+
+  std::vector<Future<>> futures(1000);
+
+  for (size_t i = 0; i < futures.size(); ++i) {
+    ASSERT_OK_AND_ASSIGN(futures[i], pool->Submit([i, pool] {
+      if (ThreadPool::GetCurrentThreadPool() == pool.get()) {
+        return Status::OK();
+      }
+      return Status::Invalid("Task #", i, " did not point to the associated 
ThreadPool");
+    }));
+  }
+
+  ASSERT_OK(AllComplete(futures).status());
+  ASSERT_OK(pool->Shutdown());
+}
+
+TEST_F(TestThreadPool, GetCurrentThreadIndex) {
+  ASSERT_EQ(ThreadPool::GetCurrentThreadIndex(), 0);
+
+  constexpr int capacity = 5;
+
+  auto pool = this->MakeThreadPool(capacity);
+
+  std::vector<Future<>> futures(1000);
+  std::vector<util::optional<std::thread::id>> std_ids(capacity);
+
+  for (size_t i = 0; i < futures.size(); ++i) {
+    ASSERT_OK_AND_ASSIGN(futures[i], pool->Submit([&std_ids, i] {
+      auto id = ThreadPool::GetCurrentThreadIndex();
+      if (!std_ids[id].has_value()) {
+        std_ids[id] = std::this_thread::get_id();
+        return Status::OK();
+      }
+
+      if (std_ids[id] == std::this_thread::get_id()) {
+        return Status::OK();
+      }
+
+      return Status::Invalid("Task #", i, " did not point to the associated 
ThreadPool");
+    }));
+  }
+
+  ASSERT_OK(AllComplete(futures).status());
+  ASSERT_OK(pool->Shutdown());
+}
+
+TEST_F(TestThreadPool, ParallelSummationWithThreadLocalState) {
+  // Sum all integers in [0, 1000000) in parallel using thread local sums.
+  constexpr int kThreadPoolCapacity = 5;
+  constexpr int kBatchSize = 1000;
+  constexpr int kBatchCount = 1000;
+
+  auto pool = this->MakeThreadPool(kThreadPoolCapacity);
+
+  std::vector<std::unique_ptr<int64_t>> local_sums(kThreadPoolCapacity);

Review comment:
       In this example per thread state is artificially cheap; we could also 
have a per-task sum without much change in performance and without invoking 
such roundabout storage. In general state can be arbitrarily expensive, for 
example a hash table used in dictionary encoding. (I can write a parallel dict 
encode example too, if that'd be of interest.)
   
   Whenever construction/maintenance of state is expensive, we'd prefer to 
reuse it and keep the number of instances to a minimum




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] bkietz commented on a change in pull request #10233: ARROW-12641: [C++] Provide thread id accessors

Reply via email to