[ 
https://issues.apache.org/jira/browse/ARROW-2458?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16444668#comment-16444668
 ] 

ASF GitHub Bot commented on ARROW-2458:
---------------------------------------

robertnishihara closed pull request #1893: ARROW-2458: [Plasma] Use one thread 
pool per PlasmaClient
URL: https://github.com/apache/arrow/pull/1893
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/plasma/client.cc b/cpp/src/plasma/client.cc
index 9635e70e4..0d44b1135 100644
--- a/cpp/src/plasma/client.cc
+++ b/cpp/src/plasma/client.cc
@@ -38,7 +38,6 @@
 
 #include <algorithm>
 #include <mutex>
-#include <thread>
 #include <vector>
 
 #include "arrow/buffer.h"
@@ -70,7 +69,6 @@ using arrow::MutableBuffer;
 // Number of threads used for memcopy and hash computations.
 constexpr int64_t kThreadPoolSize = 8;
 constexpr int64_t kBytesInMB = 1 << 20;
-static std::vector<std::thread> threadpool_(kThreadPoolSize);
 
 /// A Buffer class that automatically releases the backing plasma object
 /// when it goes out of scope.
@@ -122,7 +120,7 @@ static std::unordered_map<ObjectID, GpuProcessHandle*, 
UniqueIDHasher> gpu_objec
 static std::mutex gpu_mutex;
 #endif
 
-PlasmaClient::PlasmaClient() {
+PlasmaClient::PlasmaClient() : threadpool_(kThreadPoolSize) {
 #ifdef PLASMA_GPU
   CudaDeviceManager::GetInstance(&manager_);
 #endif
@@ -542,7 +540,7 @@ static void ComputeBlockHash(const unsigned char* data, 
int64_t nbytes, uint64_t
   *hash = XXH64_digest(&hash_state);
 }
 
-static inline bool compute_object_hash_parallel(XXH64_state_t* hash_state,
+bool PlasmaClient::compute_object_hash_parallel(XXH64_state_t* hash_state,
                                                 const unsigned char* data,
                                                 int64_t nbytes) {
   // Note that this function will likely be faster if the address of data is
@@ -578,7 +576,7 @@ static inline bool 
compute_object_hash_parallel(XXH64_state_t* hash_state,
   return true;
 }
 
-static uint64_t compute_object_hash(const ObjectBuffer& obj_buffer) {
+uint64_t PlasmaClient::compute_object_hash(const ObjectBuffer& obj_buffer) {
   DCHECK(obj_buffer.metadata);
   DCHECK(obj_buffer.data);
   XXH64_state_t hash_state;
diff --git a/cpp/src/plasma/client.h b/cpp/src/plasma/client.h
index 5787abc32..7e353b27c 100644
--- a/cpp/src/plasma/client.h
+++ b/cpp/src/plasma/client.h
@@ -25,6 +25,7 @@
 #include <functional>
 #include <memory>
 #include <string>
+#include <thread>
 #include <unordered_map>
 #include <vector>
 
@@ -40,6 +41,8 @@
 using arrow::Buffer;
 using arrow::Status;
 
+typedef struct XXH64_state_s XXH64_state_t;
+
 namespace plasma {
 
 #define PLASMA_DEFAULT_RELEASE_DELAY 64
@@ -374,6 +377,11 @@ class ARROW_EXPORT PlasmaClient {
   void increment_object_count(const ObjectID& object_id, PlasmaObject* object,
                               bool is_sealed);
 
+  bool compute_object_hash_parallel(XXH64_state_t* hash_state, const unsigned 
char* data,
+                                    int64_t nbytes);
+
+  uint64_t compute_object_hash(const ObjectBuffer& obj_buffer);
+
   /// File descriptor of the Unix domain socket that connects to the store.
   int store_conn_;
   /// File descriptor of the Unix domain socket that connects to the manager.
@@ -402,6 +410,8 @@ class ARROW_EXPORT PlasmaClient {
   /// information to make sure that it does not delay in releasing so much
   /// memory that the store is unable to evict enough objects to free up space.
   int64_t store_capacity_;
+  /// Threadpool for parallel memcopy and hash computation.
+  std::vector<std::thread> threadpool_;
 #ifdef PLASMA_GPU
   /// Cuda Device Manager.
   arrow::gpu::CudaDeviceManager* manager_;


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Plasma] PlasmaClient uses global variable
> ------------------------------------------
>
>                 Key: ARROW-2458
>                 URL: https://issues.apache.org/jira/browse/ARROW-2458
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: Plasma (C++)
>    Affects Versions: 0.9.0
>            Reporter: Philipp Moritz
>            Assignee: Philipp Moritz
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: JS-0.4.0
>
>
> The threadpool threadpool_ that PlasmaClient is using is global at the 
> moment. This prevents us from using multiple PlasmaClients in the same 
> process (one per thread).



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to