Copilot commented on code in PR #49462:
URL: https://github.com/apache/arrow/pull/49462#discussion_r3008985655
##########
cpp/src/arrow/json/reader_test.cc:
##########
@@ -290,6 +290,39 @@ TEST(ReaderTest, MultipleChunksParallel) {
AssertTablesEqual(*serial, *threaded);
}
+// Regression test for intermittent threading crashes on MinGW.
+// Run this test multiple times manually to stress-test:
+// while build/debug/arrow-json-test \
+// --gtest_filter=ReaderTest.MultipleChunksParallelRegression; do :; done
+// See https://github.com/apache/arrow/issues/49272
+
+// Helper used by multiple-chunk parallel tests to build the JSON input,
+// configure read options and read the resulting table.
+static Result<std::shared_ptr<Table>> ReadMultipleChunksParallelTable(
+ int64_t count, bool use_threads, const ParseOptions& parse_options) {
+ ReadOptions read_options;
+ read_options.block_size = static_cast<int>(count / 2);
+ read_options.use_threads = use_threads;
+
+ std::string json;
+ json.reserve(static_cast<size_t>(count) * 16); // rough reserve to avoid
reallocations
+ for (int64_t i = 0; i < count; ++i) {
+ json += "{\"a\":" + std::to_string(i) + "}\n";
+ }
+
+ return ReadToTable(std::move(json), read_options, parse_options);
+}
+
+TEST(ReaderTest, MultipleChunksParallelRegression) {
+ int64_t count = 1 << 10;
+ ParseOptions parse_options;
+ parse_options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
+
+ ASSERT_OK_AND_ASSIGN(auto table, ReadMultipleChunksParallelTable(
+ count, /*use_threads=*/true,
parse_options));
+ ASSERT_EQ(table->num_rows(), count);
Review Comment:
The PR description mentions adding a stress test that runs the parallel JSON
read 20 times, but this new test only executes the path once and relies on a
manual while-loop for stress. Either update the test to perform the intended
repeated iterations (and potentially rename accordingly), or update the PR
description/comments so they match the actual behavior.
##########
cpp/src/arrow/util/thread_pool_test.cc:
##########
@@ -660,6 +661,34 @@ TEST_F(TestThreadPool, OwnsCurrentThread) {
ASSERT_FALSE(one_failed);
}
+#ifdef _WIN32
+TEST_F(TestThreadPool, OwnsThisThreadPreservesLastError) {
+# ifndef ARROW_ENABLE_THREADING
+ GTEST_SKIP() << "Test requires threading support";
+# endif
+ auto pool = this->MakeThreadPool(4);
+
+ // Verify from outside the pool: OwnsThisThread() must not clobber
+ // the calling thread's last-error state.
+ ::SetLastError(ERROR_FILE_NOT_FOUND);
+ ASSERT_FALSE(pool->OwnsThisThread());
+ ASSERT_EQ(::GetLastError(), static_cast<DWORD>(ERROR_FILE_NOT_FOUND));
+
+ // Verify from inside a pool thread.
+ std::atomic<bool> error_preserved{true};
+ ASSERT_OK(pool->Spawn([&] {
+ ::SetLastError(ERROR_ACCESS_DENIED);
+ ASSERT_TRUE(pool->OwnsThisThread());
Review Comment:
Avoid using gtest fatal assertions (ASSERT_*) inside the spawned worker
thread. If the assertion fires it will be reported from a background thread and
can be harder to diagnose/debug; it’s also unnecessary here since the test
already uses an atomic to report failures back to the main thread. Prefer
replacing the ASSERT_TRUE call with a normal conditional that flips the atomic,
then assert on the atomic after Shutdown().
```suggestion
if (!pool->OwnsThisThread()) {
error_preserved = false;
return;
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]