kou commented on code in PR #49462:
URL: https://github.com/apache/arrow/pull/49462#discussion_r2893472182


##########
cpp/src/arrow/util/thread_pool.cc:
##########
@@ -26,6 +26,10 @@
 #include <thread>
 #include <vector>
 
+#if defined(__MINGW32__) || defined(__MINGW64__)

Review Comment:
   We can use `#ifdef __MINGW32__` because when `__MINGW64__` is defined, 
`__MINGW32__` is also defined.
   
   
   ```suggestion
   #ifdef __MINGW32__
   ```
   



##########
cpp/src/arrow/util/thread_pool.cc:
##########
@@ -630,9 +634,39 @@ void ThreadPool::CollectFinishedWorkersUnlocked() {
   state_->finished_workers_.clear();
 }
 
+// MinGW's __emutls implementation for C++ thread_local has known race 
conditions
+// during thread creation that can cause segfaults. Use native Win32 TLS 
instead.
+// See https://github.com/apache/arrow/issues/49272
+#  if defined(__MINGW32__) || defined(__MINGW64__)

Review Comment:
   ```suggestion
   #  ifdef __MINGW32__
   ```



##########
cpp/src/arrow/util/thread_pool.cc:
##########
@@ -630,9 +634,39 @@ void ThreadPool::CollectFinishedWorkersUnlocked() {
   state_->finished_workers_.clear();
 }
 
+// MinGW's __emutls implementation for C++ thread_local has known race 
conditions
+// during thread creation that can cause segfaults. Use native Win32 TLS 
instead.
+// See https://github.com/apache/arrow/issues/49272
+#  if defined(__MINGW32__) || defined(__MINGW64__)
+
+namespace {
+DWORD GetPoolTlsIndex() {
+  static DWORD index = [] {
+    DWORD i = TlsAlloc();
+    if (i == TLS_OUT_OF_INDEXES) {
+      ARROW_LOG(FATAL) << "TlsAlloc failed for thread pool TLS";

Review Comment:
   Can we show error details?
   
   
   ```suggestion
         ARROW_LOG(FATAL) << "TlsAlloc failed for thread pool TLS: " << 
internal::WinErrorMessage(GetLastError());
   ```



##########
cpp/src/arrow/util/thread_pool.cc:
##########
@@ -26,6 +26,10 @@
 #include <thread>
 #include <vector>
 
+#if defined(__MINGW32__) || defined(__MINGW64__)
+#  include "arrow/util/windows_compatibility.h"
+#endif

Review Comment:
   We can always include `windows_compatibility.h` because it does nothing on 
non Windows. 
   
   ```suggestion
   #include "arrow/util/windows_compatibility.h"
   ```



##########
cpp/src/arrow/json/reader_test.cc:
##########
@@ -290,6 +290,32 @@ TEST(ReaderTest, MultipleChunksParallel) {
   AssertTablesEqual(*serial, *threaded);
 }
 
+// Stress test for intermittent threading crashes on MinGW.
+// See https://github.com/apache/arrow/issues/49272
+TEST(ReaderTest, MultipleChunksParallelStress) {
+  constexpr int kTrials = 20;
+  for (int trial = 0; trial < kTrials; ++trial) {
+    int64_t count = 1 << 10;
+    ParseOptions parse_options;
+    parse_options.unexpected_field_behavior = 
UnexpectedFieldBehavior::InferType;
+    ReadOptions read_options;
+    read_options.block_size = static_cast<int>(count / 2);
+    read_options.use_threads = true;
+
+    std::string json;
+    for (int i = 0; i < count; ++i) {
+      json += "{\"a\":" + std::to_string(i) + "}\n";
+    }
+
+    std::shared_ptr<io::InputStream> input;
+    ASSERT_OK(MakeStream(json, &input));
+    ASSERT_OK_AND_ASSIGN(auto reader, TableReader::Make(default_memory_pool(), 
input,
+                                                        read_options, 
parse_options));
+    ASSERT_OK_AND_ASSIGN(auto table, reader->Read());
+    ASSERT_EQ(table->num_rows(), count);
+  }
+}

Review Comment:
   Is this a heavy test?
   
   If so, we don't want to this test in our CI. Can we disable it by default 
something like the following?
   
   ```cpp
   TEST(...) {
     if (!std::getenv("ARROW_SLOW_TEST_ENABLE")) {
       GTEST_SKIP() << ...;
     }
     ...
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to