This is an automated email from the ASF dual-hosted git repository.

wgtmac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 3589dc2a5b GH-50010: [C++][Parquet] Fix undefined behavior in 
TypedColumnWriterImpl::UpdateLevelHistogram (#50011)
3589dc2a5b is described below

commit 3589dc2a5bcf77e087cd40bd502490f90eba8017
Author: Zehua Zou <[email protected]>
AuthorDate: Tue May 26 10:31:31 2026 +0800

    GH-50010: [C++][Parquet] Fix undefined behavior in 
TypedColumnWriterImpl::UpdateLevelHistogram (#50011)
    
    ### Rationale for this change
    
    Fix an undefined behavior.
    
    ### What changes are included in this PR?
    
    It fixes the undefined behavior by delaying the span construction.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #50010
    
    Authored-by: Zehua Zou <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
---
 cpp/src/parquet/column_writer.cc | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index b3ed46ee2d..653f28f64b 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -1807,20 +1807,19 @@ class TypedColumnWriterImpl : public ColumnWriterImpl,
       return;
     }
 
-    auto add_levels = [](std::vector<int64_t>& level_histogram,
-                         std::span<const int16_t> levels, int16_t max_level) {
+    auto add_levels = [](std::vector<int64_t>& level_histogram, const int16_t* 
levels,
+                         int64_t num_levels, int16_t max_level) {
       if (max_level == 0) {
         return;
       }
       ARROW_DCHECK_EQ(static_cast<size_t>(max_level) + 1, 
level_histogram.size());
-      ::parquet::UpdateLevelHistogram(levels, level_histogram);
+      std::span<const int16_t> level_span{levels, 
static_cast<size_t>(num_levels)};
+      ::parquet::UpdateLevelHistogram(level_span, level_histogram);
     };
 
-    add_levels(page_size_statistics_->definition_level_histogram,
-               {def_levels, static_cast<size_t>(num_levels)},
+    add_levels(page_size_statistics_->definition_level_histogram, def_levels, 
num_levels,
                descr_->max_definition_level());
-    add_levels(page_size_statistics_->repetition_level_histogram,
-               {rep_levels, static_cast<size_t>(num_levels)},
+    add_levels(page_size_statistics_->repetition_level_histogram, rep_levels, 
num_levels,
                descr_->max_repetition_level());
   }
 

Reply via email to