pitrou commented on code in PR #45202: URL: https://github.com/apache/arrow/pull/45202#discussion_r1908831118
########## cpp/src/parquet/size_statistics.cc: ########## @@ -91,4 +101,87 @@ std::unique_ptr<SizeStatistics> SizeStatistics::Make(const ColumnDescriptor* des return size_stats; } +std::ostream& operator<<(std::ostream& os, const SizeStatistics& size_stats) { + constexpr std::string_view kComma = ", "; + os << "SizeStatistics{"; + std::string_view sep = ""; + if (size_stats.unencoded_byte_array_data_bytes.has_value()) { + os << "unencoded_byte_array_data_bytes=" + << *size_stats.unencoded_byte_array_data_bytes; + sep = kComma; + } + auto print_histogram = [&](std::string_view name, + const std::vector<int64_t>& histogram) { + if (!histogram.empty()) { + os << sep << name << "={"; + sep = kComma; + std::string_view value_sep = ""; + for (int64_t v : histogram) { + os << value_sep << v; + value_sep = kComma; + } + os << "}"; + } + }; + print_histogram("repetition_level_histogram", size_stats.repetition_level_histogram); + print_histogram("definition_level_histogram", size_stats.definition_level_histogram); + os << "}"; + return os; +} + +void UpdateLevelHistogram(::arrow::util::span<const int16_t> levels, + ::arrow::util::span<int64_t> histogram) { + const int64_t num_levels = static_cast<int64_t>(levels.size()); + const int16_t max_level = static_cast<int16_t>(histogram.size() - 1); Review Comment: We could! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org