EnricoMi commented on code in PR #44990:
URL: https://github.com/apache/arrow/pull/44990#discussion_r1883691994
##########
cpp/src/arrow/dataset/file_parquet_encryption_test.cc:
##########
@@ -151,21 +167,53 @@ class DatasetEncryptionTestBase : public ::testing::Test {
// Create the dataset
ASSERT_OK_AND_ASSIGN(auto dataset, dataset_factory->Finish());
- // Reuse the dataset above to scan it twice to make sure decryption works
correctly.
- for (size_t i = 0; i < 2; ++i) {
- // Read dataset into table
- ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
- ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
- ASSERT_OK_AND_ASSIGN(auto read_table, scanner->ToTable());
-
- // Verify the data was read correctly
- ASSERT_OK_AND_ASSIGN(auto combined_table, read_table->CombineChunks());
- // Validate the table
- ASSERT_OK(combined_table->ValidateFull());
- AssertTablesEqual(*combined_table, *table_);
+ if (concurrently) {
+ // start with a single thread so we are more likely to build up a queue
of jobs
+ ASSERT_OK_AND_ASSIGN(auto pool, arrow::internal::ThreadPool::Make(1));
+ std::vector<Future<std::shared_ptr<Table>>> threads;
+
+ // Read dataset above multiple times concurrently to see that is
thread-safe.
+ for (size_t i = 0; i < 100; ++i) {
+ threads.push_back(
+ DeferNotOk(pool->Submit(DatasetEncryptionTestBase::read,
dataset)));
+ }
+
+ // ramp up parallelism
+ ASSERT_OK(pool->SetCapacity(16));
+ // ensure there are sufficient jobs to see concurrent processing
+ ASSERT_GT(pool->GetNumTasks(), 16);
Review Comment:
these tests are flaky any way if they fail, this does not make it more
deterministic, removed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]