pitrou commented on a change in pull request #11273:
URL: https://github.com/apache/arrow/pull/11273#discussion_r718855607



##########
File path: cpp/src/arrow/compute/kernels/vector_sort_test.cc
##########
@@ -1758,21 +1851,46 @@ TEST_P(TestTableSortIndicesRandom, Sort) {
 
   SortOptions options(sort_keys);
 
-  // Test with different table chunkings
-  for (const int64_t num_chunks : {1, 2, 20}) {
-    ARROW_SCOPED_TRACE("Table sorting: num_chunks = ", num_chunks);
-    TableBatchReader reader(*table);
-    reader.set_chunksize((length + num_chunks - 1) / num_chunks);
-    ASSERT_OK_AND_ASSIGN(auto chunked_table, 
Table::FromRecordBatchReader(&reader));
+  // Test with different, heterogenous table chunkings
+  for (const int64_t max_num_chunks : {1, 3, 15}) {
+    ARROW_SCOPED_TRACE("Table sorting: max chunks per column = ", 
max_num_chunks);
+    std::uniform_int_distribution<int64_t> num_chunk_dist(1 + max_num_chunks / 
2,
+                                                          max_num_chunks);
+    ChunkedArrayVector columns;
+    columns.reserve(fields.size());
+
+    // Chunk each column independently
+    for (const auto& factory : column_factories) {
+      const int64_t num_chunks = num_chunk_dist(engine);
+      ArrayVector chunks(num_chunks);
+      const auto offsets =
+          checked_pointer_cast<Int32Array>(rng.Offsets(num_chunks + 1, 0, 
length));
+      for (int64_t i = 0; i < num_chunks; ++i) {
+        const auto chunk_len = offsets->Value(i + 1) - offsets->Value(i);
+        chunks[i] = factory(chunk_len);
+      }
+      columns.push_back(std::make_shared<ChunkedArray>(std::move(chunks)));
+      ASSERT_EQ(columns.back()->length(), length);
+    }
+
+    auto table = Table::Make(schema, std::move(columns));
     for (auto null_placement : AllNullPlacements()) {
       ARROW_SCOPED_TRACE("null_placement = ", null_placement);
       options.null_placement = null_placement;
-      ASSERT_OK_AND_ASSIGN(auto offsets, SortIndices(Datum(*chunked_table), 
options));
+      ASSERT_OK_AND_ASSIGN(auto offsets, SortIndices(Datum(*table), options));
       Validate(*table, options, *checked_pointer_cast<UInt64Array>(offsets));
     }
   }
 
   // Also validate RecordBatch sorting
+  ChunkedArrayVector columns;
+  columns.reserve(fields.size());
+  for (const auto& factory : column_factories) {
+    columns.push_back(std::make_shared<ChunkedArray>(factory(length)));
+  }
+  auto table = Table::Make(schema, std::move(columns));
+  ASSERT_OK(table->ValidateFull());
+

Review comment:
       Hmm, good point!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to