sfc-gh-ebrossard commented on code in PR #37016:
URL: https://github.com/apache/arrow/pull/37016#discussion_r1283579685
##########
cpp/src/parquet/statistics_test.cc:
##########
@@ -645,6 +645,50 @@ class TestStatisticsHasFlag : public
TestStatistics<TestType> {
EXPECT_FALSE(merged_statistics->HasDistinctCount());
EXPECT_FALSE(merged_statistics->Encode().has_distinct_count);
});
+
+ // Create a statistics object with zero distinct count. Merging preserves
the distinct
+ // count if either side is zero.
+ std::shared_ptr<TypedStatistics<TestType>> statistics3;
+ std::shared_ptr<TypedStatistics<TestType>> statistics4;
+ {
+ EncodedStatistics encoded_statistics3;
+ encoded_statistics3.has_distinct_count = true;
+ encoded_statistics3.distinct_count = 0;
+ statistics3 = std::dynamic_pointer_cast<TypedStatistics<TestType>>(
+ Statistics::Make(this->schema_.Column(0), &encoded_statistics3,
+ /*num_values=*/0));
+ EXPECT_TRUE(statistics3->HasDistinctCount());
+
+ EncodedStatistics encoded_statistics4;
+ encoded_statistics4.has_distinct_count = true;
+ encoded_statistics4.distinct_count = 10;
+ statistics4 = std::dynamic_pointer_cast<TypedStatistics<TestType>>(
+ Statistics::Make(this->schema_.Column(0), &encoded_statistics4,
+ /*num_values=*/10));
+ EXPECT_TRUE(statistics4->HasDistinctCount());
+ }
+
+ // Both sides have 0 for the distinct count.
+ VerifyMergedStatistics(*statistics3, *statistics3,
+ [](TypedStatistics<TestType>* merged_statistics) {
+
EXPECT_TRUE(merged_statistics->HasDistinctCount());
Review Comment:
Sorry, these tests are still failing. I'll see if I can get them running
locally with
https://arrow.apache.org/docs/developers/cpp/development.html#running-unit-tests
and figure out what I've missed here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]