pitrou commented on code in PR #45202:
URL: https://github.com/apache/arrow/pull/45202#discussion_r1908831118
##########
cpp/src/parquet/size_statistics.cc:
##########
@@ -91,4 +101,87 @@ std::unique_ptr<SizeStatistics> SizeStatistics::Make(const
ColumnDescriptor* des
return size_stats;
}
+std::ostream& operator<<(std::ostream& os, const SizeStatistics& size_stats) {
+ constexpr std::string_view kComma = ", ";
+ os << "SizeStatistics{";
+ std::string_view sep = "";
+ if (size_stats.unencoded_byte_array_data_bytes.has_value()) {
+ os << "unencoded_byte_array_data_bytes="
+ << *size_stats.unencoded_byte_array_data_bytes;
+ sep = kComma;
+ }
+ auto print_histogram = [&](std::string_view name,
+ const std::vector<int64_t>& histogram) {
+ if (!histogram.empty()) {
+ os << sep << name << "={";
+ sep = kComma;
+ std::string_view value_sep = "";
+ for (int64_t v : histogram) {
+ os << value_sep << v;
+ value_sep = kComma;
+ }
+ os << "}";
+ }
+ };
+ print_histogram("repetition_level_histogram",
size_stats.repetition_level_histogram);
+ print_histogram("definition_level_histogram",
size_stats.definition_level_histogram);
+ os << "}";
+ return os;
+}
+
+void UpdateLevelHistogram(::arrow::util::span<const int16_t> levels,
+ ::arrow::util::span<int64_t> histogram) {
+ const int64_t num_levels = static_cast<int64_t>(levels.size());
+ const int16_t max_level = static_cast<int16_t>(histogram.size() - 1);
Review Comment:
We could!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]