This is an automated email from the ASF dual-hosted git repository.
wgtmac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 3589dc2a5b GH-50010: [C++][Parquet] Fix undefined behavior in
TypedColumnWriterImpl::UpdateLevelHistogram (#50011)
3589dc2a5b is described below
commit 3589dc2a5bcf77e087cd40bd502490f90eba8017
Author: Zehua Zou <[email protected]>
AuthorDate: Tue May 26 10:31:31 2026 +0800
GH-50010: [C++][Parquet] Fix undefined behavior in
TypedColumnWriterImpl::UpdateLevelHistogram (#50011)
### Rationale for this change
Fix an undefined behavior.
### What changes are included in this PR?
It fixes the undefined behavior by delaying the span construction.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* GitHub Issue: #50010
Authored-by: Zehua Zou <[email protected]>
Signed-off-by: Gang Wu <[email protected]>
---
cpp/src/parquet/column_writer.cc | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index b3ed46ee2d..653f28f64b 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -1807,20 +1807,19 @@ class TypedColumnWriterImpl : public ColumnWriterImpl,
return;
}
- auto add_levels = [](std::vector<int64_t>& level_histogram,
- std::span<const int16_t> levels, int16_t max_level) {
+ auto add_levels = [](std::vector<int64_t>& level_histogram, const int16_t*
levels,
+ int64_t num_levels, int16_t max_level) {
if (max_level == 0) {
return;
}
ARROW_DCHECK_EQ(static_cast<size_t>(max_level) + 1,
level_histogram.size());
- ::parquet::UpdateLevelHistogram(levels, level_histogram);
+ std::span<const int16_t> level_span{levels,
static_cast<size_t>(num_levels)};
+ ::parquet::UpdateLevelHistogram(level_span, level_histogram);
};
- add_levels(page_size_statistics_->definition_level_histogram,
- {def_levels, static_cast<size_t>(num_levels)},
+ add_levels(page_size_statistics_->definition_level_histogram, def_levels,
num_levels,
descr_->max_definition_level());
- add_levels(page_size_statistics_->repetition_level_histogram,
- {rep_levels, static_cast<size_t>(num_levels)},
+ add_levels(page_size_statistics_->repetition_level_histogram, rep_levels,
num_levels,
descr_->max_repetition_level());
}