pitrou commented on code in PR #45202:
URL: https://github.com/apache/arrow/pull/45202#discussion_r1908281821
##########
cpp/src/parquet/size_statistics.cc:
##########
@@ -91,4 +100,83 @@ std::unique_ptr<SizeStatistics> SizeStatistics::Make(const
ColumnDescriptor* des
return size_stats;
}
+std::ostream& operator<<(std::ostream& os, const SizeStatistics& size_stats) {
+ constexpr std::string_view kComma = ", ";
+ os << "SizeStatistics{";
+ std::string_view sep = "";
+ if (size_stats.unencoded_byte_array_data_bytes.has_value()) {
+ os << "unencoded_byte_array_data_bytes="
+ << *size_stats.unencoded_byte_array_data_bytes;
+ sep = kComma;
+ }
+ auto print_histogram = [&](std::string_view name,
+ const std::vector<int64_t>& histogram) {
+ if (!histogram.empty()) {
+ os << sep << name << "={";
+ sep = kComma;
+ std::string_view value_sep = "";
+ for (int64_t v : histogram) {
+ os << value_sep << v;
+ value_sep = kComma;
+ }
+ os << "}";
+ }
+ };
+ print_histogram("repetition_level_histogram",
size_stats.repetition_level_histogram);
+ print_histogram("definition_level_histogram",
size_stats.definition_level_histogram);
+ os << "}";
+ return os;
+}
+
+void UpdateLevelHistogram(::arrow::util::span<const int16_t> levels,
+ ::arrow::util::span<int64_t> histogram) {
+ const int64_t num_levels = static_cast<int64_t>(levels.size());
+ const int16_t max_level = static_cast<int16_t>(histogram.size() - 1);
+ if (max_level == 0) {
+ histogram[0] += num_levels;
+ return;
+ }
+ // The goal of the two specialized paths below is to accelerate common cases
+ // by keeping histogram values in registers.
+ // The fallback implementation (`++histogram[level]`) issues a series of
+ // load-stores with frequent conflicts.
+ if (max_level == 1) {
+ // Specialize the common case for non-repeated non-nested columns
+ // by keeping histogram values in a register, which avoids being limited
+ // by CPU cache latency.
+ int64_t hist0 = 0;
+ for (int16_t level : levels) {
+ ARROW_DCHECK_LE(level, max_level);
+ hist0 += (level == 0);
Review Comment:
> This may have a clang-tidy issue for the implicit bool to integer
conversion:
https://clang.llvm.org/extra/clang-tidy/checks/readability/implicit-bool-conversion.html
Do we use clang-tidy? The lint step has passed.
##########
cpp/src/parquet/size_statistics.cc:
##########
@@ -91,4 +100,83 @@ std::unique_ptr<SizeStatistics> SizeStatistics::Make(const
ColumnDescriptor* des
return size_stats;
}
+std::ostream& operator<<(std::ostream& os, const SizeStatistics& size_stats) {
+ constexpr std::string_view kComma = ", ";
+ os << "SizeStatistics{";
+ std::string_view sep = "";
+ if (size_stats.unencoded_byte_array_data_bytes.has_value()) {
+ os << "unencoded_byte_array_data_bytes="
+ << *size_stats.unencoded_byte_array_data_bytes;
+ sep = kComma;
+ }
+ auto print_histogram = [&](std::string_view name,
+ const std::vector<int64_t>& histogram) {
+ if (!histogram.empty()) {
+ os << sep << name << "={";
+ sep = kComma;
+ std::string_view value_sep = "";
+ for (int64_t v : histogram) {
+ os << value_sep << v;
+ value_sep = kComma;
+ }
+ os << "}";
+ }
+ };
+ print_histogram("repetition_level_histogram",
size_stats.repetition_level_histogram);
+ print_histogram("definition_level_histogram",
size_stats.definition_level_histogram);
+ os << "}";
+ return os;
+}
+
+void UpdateLevelHistogram(::arrow::util::span<const int16_t> levels,
+ ::arrow::util::span<int64_t> histogram) {
+ const int64_t num_levels = static_cast<int64_t>(levels.size());
+ const int16_t max_level = static_cast<int16_t>(histogram.size() - 1);
+ if (max_level == 0) {
+ histogram[0] += num_levels;
+ return;
+ }
+ // The goal of the two specialized paths below is to accelerate common cases
+ // by keeping histogram values in registers.
+ // The fallback implementation (`++histogram[level]`) issues a series of
+ // load-stores with frequent conflicts.
+ if (max_level == 1) {
+ // Specialize the common case for non-repeated non-nested columns
+ // by keeping histogram values in a register, which avoids being limited
+ // by CPU cache latency.
+ int64_t hist0 = 0;
+ for (int16_t level : levels) {
+ ARROW_DCHECK_LE(level, max_level);
+ hist0 += (level == 0);
Review Comment:
> Since levels are either 0 or 1 in this case, is it faster to compute the
sum of levels as histogram[1]?
I'm not sure I understand your comment, are you suggesting a particular
change?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]