[ 
https://issues.apache.org/jira/browse/ARROW-1812?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16253671#comment-16253671
 ] 

ASF GitHub Bot commented on ARROW-1812:
---------------------------------------

wesm closed pull request #1322: ARROW-1812: [C++] Plasma store modifies hash 
table while iterating during client disconnect
URL: https://github.com/apache/arrow/pull/1322
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/plasma/store.cc b/cpp/src/plasma/store.cc
index 31033ccbb..7094aed6f 100644
--- a/cpp/src/plasma/store.cc
+++ b/cpp/src/plasma/store.cc
@@ -460,14 +460,22 @@ void PlasmaStore::disconnect_client(int client_fd) {
   ARROW_LOG(INFO) << "Disconnecting client on fd " << client_fd;
   // If this client was using any objects, remove it from the appropriate
   // lists.
+  // TODO(swang): Avoid iteration through the object table.
   auto client = it->second.get();
+  std::vector<ObjectID> unsealed_objects;
   for (const auto& entry : store_info_.objects) {
     if (entry.second->state == PLASMA_SEALED) {
       remove_client_from_object_clients(entry.second.get(), client);
     } else {
-      abort_object(entry.first, client);
+      // Add unsealed objects to a temporary list of object IDs. Do not perform
+      // the abort here, since it potentially modifies the object table.
+      unsealed_objects.push_back(entry.first);
     }
   }
+  // If the client was creating any objects, abort them.
+  for (const auto& entry : unsealed_objects) {
+    abort_object(entry, client);
+  }
 
   // Note, the store may still attempt to send a message to the disconnected
   // client (for example, when an object ID that the client was waiting for
diff --git a/cpp/src/plasma/test/client_tests.cc 
b/cpp/src/plasma/test/client_tests.cc
index d4285f898..f44ed2510 100644
--- a/cpp/src/plasma/test/client_tests.cc
+++ b/cpp/src/plasma/test/client_tests.cc
@@ -209,6 +209,59 @@ TEST_F(TestPlasmaStore, MultipleClientTest) {
   ASSERT_EQ(has_object, true);
 }
 
+TEST_F(TestPlasmaStore, ManyObjectTest) {
+  // Create many objects on the first client. Seal one third, abort one third,
+  // and leave the last third unsealed.
+  std::vector<ObjectID> object_ids;
+  for (int i = 0; i < 100; i++) {
+    ObjectID object_id = ObjectID::from_random();
+    object_ids.push_back(object_id);
+
+    // Test for object non-existence on the first client.
+    bool has_object;
+    ARROW_CHECK_OK(client_.Contains(object_id, &has_object));
+    ASSERT_EQ(has_object, false);
+
+    // Test for the object being in local Plasma store.
+    // First create and seal object on the first client.
+    int64_t data_size = 100;
+    uint8_t metadata[] = {5};
+    int64_t metadata_size = sizeof(metadata);
+    uint8_t* data;
+    ARROW_CHECK_OK(client_.Create(object_id, data_size, metadata, 
metadata_size, &data));
+
+    if (i % 3 == 0) {
+      // Seal one third of the objects.
+      ARROW_CHECK_OK(client_.Seal(object_id));
+      // Test that the first client can get the object.
+      ARROW_CHECK_OK(client_.Contains(object_id, &has_object));
+      ASSERT_EQ(has_object, true);
+    } else if (i % 3 == 1) {
+      // Abort one third of the objects.
+      ARROW_CHECK_OK(client_.Release(object_id));
+      ARROW_CHECK_OK(client_.Abort(object_id));
+    }
+  }
+  // Disconnect the first client. All unsealed objects should be aborted.
+  ARROW_CHECK_OK(client_.Disconnect());
+
+  // Check that the second client can query the object store for the first
+  // client's objects.
+  int i = 0;
+  for (auto const& object_id : object_ids) {
+    bool has_object;
+    ARROW_CHECK_OK(client2_.Contains(object_id, &has_object));
+    if (i % 3 == 0) {
+      // The first third should be sealed.
+      ASSERT_EQ(has_object, true);
+    } else {
+      // The rest were aborted, so the object is not in the store.
+      ASSERT_EQ(has_object, false);
+    }
+    i++;
+  }
+}
+
 }  // namespace plasma
 
 int main(int argc, char** argv) {


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Plasma store modifies hash table while iterating during client disconnect
> -------------------------------------------------------------------------
>
>                 Key: ARROW-1812
>                 URL: https://issues.apache.org/jira/browse/ARROW-1812
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Plasma (C++)
>    Affects Versions: 0.8.0
>            Reporter: Stephanie Wang
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>
> ARROW-1788 introduces a bug where, when a client disconnects, we potentially 
> modify the plasma store's hash table of objects while iterating through it.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to