bkietz commented on a change in pull request #10233:
URL: https://github.com/apache/arrow/pull/10233#discussion_r625846193
##########
File path: cpp/src/arrow/util/thread_pool_test.cc
##########
@@ -512,6 +512,98 @@ TEST_F(TestThreadPool, Submit) {
}
}
+TEST_F(TestThreadPool, GetCurrentThreadPool) {
+ ASSERT_EQ(ThreadPool::GetCurrentThreadPool(), nullptr);
+
+ auto pool = this->MakeThreadPool(5);
+
+ std::vector<Future<>> futures(1000);
+
+ for (size_t i = 0; i < futures.size(); ++i) {
+ ASSERT_OK_AND_ASSIGN(futures[i], pool->Submit([i, pool] {
+ if (ThreadPool::GetCurrentThreadPool() == pool.get()) {
+ return Status::OK();
+ }
+ return Status::Invalid("Task #", i, " did not point to the associated
ThreadPool");
+ }));
+ }
+
+ ASSERT_OK(AllComplete(futures).status());
+ ASSERT_OK(pool->Shutdown());
+}
+
+TEST_F(TestThreadPool, GetCurrentThreadIndex) {
+ ASSERT_EQ(ThreadPool::GetCurrentThreadIndex(), 0);
+
+ constexpr int capacity = 5;
+
+ auto pool = this->MakeThreadPool(capacity);
+
+ std::vector<Future<>> futures(1000);
+ std::vector<util::optional<std::thread::id>> std_ids(capacity);
+
+ for (size_t i = 0; i < futures.size(); ++i) {
+ ASSERT_OK_AND_ASSIGN(futures[i], pool->Submit([&std_ids, i] {
+ auto id = ThreadPool::GetCurrentThreadIndex();
+ if (!std_ids[id].has_value()) {
+ std_ids[id] = std::this_thread::get_id();
+ return Status::OK();
+ }
+
+ if (std_ids[id] == std::this_thread::get_id()) {
+ return Status::OK();
+ }
+
+ return Status::Invalid("Task #", i, " did not point to the associated
ThreadPool");
+ }));
+ }
+
+ ASSERT_OK(AllComplete(futures).status());
+ ASSERT_OK(pool->Shutdown());
+}
+
+TEST_F(TestThreadPool, ParallelSummationWithThreadLocalState) {
+ // Sum all integers in [0, 1000000) in parallel using thread local sums.
+ constexpr int kThreadPoolCapacity = 5;
+ constexpr int kBatchSize = 1000;
+ constexpr int kBatchCount = 1000;
+
+ auto pool = this->MakeThreadPool(kThreadPoolCapacity);
+
+ std::vector<std::unique_ptr<int64_t>> local_sums(kThreadPoolCapacity);
Review comment:
In this example per thread state is artificially cheap; we could also
have a per-task sum without much change in performance and without invoking
such roundabout storage. In general state can be arbitrarily expensive, for
example a hash table used in dictionary encoding. (I can write a parallel dict
encode example too, if that'd be of interest.)
Whenever construction/maintenance of state is expensive, we'd prefer to
reuse it and keep the number of instances to a minimum
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]