HuaHuaY opened a new issue, #50010:
URL: https://github.com/apache/arrow/issues/50010

   ### Describe the bug, including details regarding any error messages, 
version, and platform.
   
   ```cpp
     void UpdateLevelHistogram(int64_t num_levels, const int16_t* def_levels,
                               const int16_t* rep_levels) const {
       if (page_size_statistics_ == nullptr) {
         return;
       }
   
       auto add_levels = [](std::vector<int64_t>& level_histogram,
                            std::span<const int16_t> levels, int16_t max_level) 
{
         if (max_level == 0) {
           return;
         }
         ARROW_DCHECK_EQ(static_cast<size_t>(max_level) + 1, 
level_histogram.size());
         ::parquet::UpdateLevelHistogram(levels, level_histogram);
       };
   
       add_levels(page_size_statistics_->definition_level_histogram,
                  {def_levels, static_cast<size_t>(num_levels)},
                  descr_->max_definition_level());
       add_levels(page_size_statistics_->repetition_level_histogram,
                  {rep_levels, static_cast<size_t>(num_levels)},
                  descr_->max_repetition_level());
     }
   ```
   `num_levels` may not be zero when `descr_->max_repetition_level() == 0` and 
`rep_levels == nullptr`. Then an invalid range is passed to `std::span` and 
it's an undefined behavior due to 
https://eel.is/c++draft/views.span#span.cons-4.
   
   Backtraces:
   ```cpp
   Console is in 'commands' mode, prefix expressions with '?'.
   Launching: 
/Users/huahua/github/arrow/cpp/out/build/ninja-debug/debug/parquet-arrow-index-test
   Launched process 43654 from 
'/Users/huahua/github/arrow/cpp/out/build/ninja-debug/debug/parquet-arrow-index-test'
   Stop reason: libc++: 
/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-libcxx-20.1.8-dev/include/c++/v1/span:451:
 assertion __count == 0 || std::to_address(__first) != nullptr failed: passed 
nullptr with non-zero length in span's constructor (iterator, len)
   
   bt
   * thread #1, queue = 'com.apple.main-thread', stop reason = libc++: 
/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-libcxx-20.1.8-dev/include/c++/v1/span:451:
 assertion __count == 0 || std::to_address(__first) != nullptr failed: passed 
nullptr with non-zero length in span's constructor (iterator, len)
   
     * frame #0: 0x000000010202cc84 
libparquet.2500.0.0.dylib`__clang_trap_msg$libc++$/nix/store/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee-libcxx-20.1.8-dev/include/c++/v1/span:451:
 assertion __count == 0 || std::to_address(__first) != nullptr failed: passed 
nullptr with non-zero length in span's constructor (iterator, len)
    at span:0 [inlined]
       frame #1: 0x000000010202cc84 
libparquet.2500.0.0.dylib`std::__1::span<short const, 
18446744073709551615ul>::span[abi:fe200100]<short const*>(this=size=4, 
__first=0x0000000000000000, __count=4) at span:450:5
       frame #2: 0x000000010202ca40 
libparquet.2500.0.0.dylib`std::__1::span<short const, 
18446744073709551615ul>::span[abi:fe200100]<short const*>(this=size=4, 
__first=0x0000000000000000, __count=4) at span:449:61
       frame #3: 0x000000010202c2e8 
libparquet.2500.0.0.dylib`parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::UpdateLevelHistogram(this=0x0000000103bb0348,
 num_levels=4, def_levels=0x0000020000260640, rep_levels=0x0000000000000000) 
const at column_writer.cc:1823:16
       frame #4: 0x000000010202e138 
libparquet.2500.0.0.dylib`parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteLevelsSpaced(this=0x0000000103bb0348,
 num_levels=4, def_levels=0x0000020000260640, rep_levels=0x0000000000000000) at 
column_writer.cc:1780:5
       frame #5: 0x000000010202db34 
libparquet.2500.0.0.dylib`parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteBatchSpacedInternal(long
 long, short const*, short const*, unsigned char const*, long long, long long 
const*)::'lambda'(long long, long long, 
bool)::operator()(this=0x000000016fdf04d0, offset=0, batch_size=4, 
check_page=true) const at column_writer.cc:1398:7
       frame #6: 0x000000010202d42c libparquet.2500.0.0.dylib`void 
parquet::DoInBatchesNonRepeated<parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteBatchSpacedInternal(long
 long, short const*, short const*, unsigned char const*, long long, long long 
const*)::'lambda'(long long, long long, bool)&, 
parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteBatchSpacedInternal(long
 long, short const*, short const*, unsigned char const*, long long, long long 
const*)::'lambda'()>(num_levels=4, batch_size=1024, max_rows_per_page=20000, 
action=0x000000016fdf04d0, curr_page_buffered_rows=0x000000016fdf04c8) at 
column_writer.cc:1182:5
       frame #7: 0x000000010202d124 libparquet.2500.0.0.dylib`void 
parquet::DoInBatches<parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteBatchSpacedInternal(long
 long, short const*, short const*, unsigned char const*, long long, long long 
const*)::'lambda'(long long, long long, bool)&, 
parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteBatchSpacedInternal(long
 long, short const*, short const*, unsigned char const*, long long, long long 
const*)::'lambda'()>(def_levels=0x0000020000260640, 
rep_levels=0x0000000000000000, num_levels=4, batch_size=1024, 
max_rows_per_page=20000, pages_change_on_record_boundaries=true, 
action=0x000000016fdf04d0, curr_page_buffered_rows=0x000000016fdf04c8) at 
column_writer.cc:1246:5
       frame #8: 0x000000010202a310 
libparquet.2500.0.0.dylib`parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteBatchSpacedInternal(this=0x0000000103bb0348,
 num_values=4, def_levels=0x0000020000260640, rep_levels=0x0000000000000000, 
valid_bits="7", valid_bits_offset=0, values=0x0000020000260500) at 
column_writer.cc:1418:5
       frame #9: 0x000000010200bdac 
libparquet.2500.0.0.dylib`parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteArrowZeroCopy(this=0x0000000103bb0348,
 def_levels=0x0000020000260640, rep_levels=0x0000000000000000, num_levels=4, 
array=0x00000008c6c14248, ctx=0x00000008c6c0d378, maybe_parent_nulls=false) at 
column_writer.cc:1541:7
       frame #10: 0x000000010200c2f8 
libparquet.2500.0.0.dylib`parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteArrowDense(this=0x0000000103bb0348,
 def_levels=0x0000020000260640, rep_levels=0x0000000000000000, num_levels=4, 
array=0x00000008c6c14248, ctx=0x00000008c6c0d378, maybe_parent_nulls=false) at 
column_writer.cc:2439:7
       frame #11: 0x0000000102063ea4 
libparquet.2500.0.0.dylib`parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>>::WriteArrow(this=0x0000000103bb0348,
 def_levels=0x0000020000260640, rep_levels=0x0000000000000000, num_levels=4, 
leaf_array=0x00000008c6c14248, ctx=0x00000008c6c0d378, 
leaf_field_nullable=true) at column_writer.cc:1481:16
       frame #12: 0x0000000101f22f10 
libparquet.2500.0.0.dylib`parquet::arrow::(anonymous 
namespace)::ArrowColumnWriterV2::Write(parquet::ArrowWriteContext*)::'lambda'(parquet::arrow::MultipathLevelBuilderResult
 const&)::operator()(this=0x000000016fdf1110, result=0x000000016fdf1010) const 
at writer.cc:168:9
       frame #19: 0x0000000101dfa798 
libparquet.2500.0.0.dylib`std::__1::function<arrow::Status 
(parquet::arrow::MultipathLevelBuilderResult const&)>::operator()(this= Lambda 
in File writer.cc at Line 168, __arg=0x000000016fdf1010) const at 
function.h:995:10
       frame #20: 0x0000000101dfa210 
libparquet.2500.0.0.dylib`parquet::arrow::(anonymous 
namespace)::WritePath(root_range=(start = 0, end = 4), 
path_info=0x00000008c6c02700, arrow_context=0x00000008c6c0d378, writer= Lambda 
in File writer.cc at Line 168) at path_internal.cc:633:10
       frame #21: 0x0000000101df96c8 
libparquet.2500.0.0.dylib`parquet::arrow::MultipathLevelBuilderImpl::Write(this=0x00000008c6c02610,
 leaf_index=0, context=0x00000008c6c0d378, write_leaf_callback= Lambda in File 
writer.cc at Line 168) at path_internal.cc:873:12
       frame #22: 0x0000000101f1dde4 
libparquet.2500.0.0.dylib`parquet::arrow::(anonymous 
namespace)::ArrowColumnWriterV2::Write(this=0x00000008c6c02760, 
ctx=0x00000008c6c0d378) at writer.cc:168:9
       frame #23: 0x0000000101f19990 
libparquet.2500.0.0.dylib`parquet::arrow::FileWriterImpl::WriteColumnChunk(this=0x00000008c6c0d2c0,
 data=std::__1::shared_ptr<arrow::ChunkedArray>::element_type @ 
0x0000000103bae9f8 strong=2 weak=0, offset=0, size=4) at writer.cc:382:22
       frame #24: 0x0000000101f1c7bc 
libparquet.2500.0.0.dylib`parquet::arrow::FileWriterImpl::WriteTable(arrow::Table
 const&, long long)::'lambda'(long long, long 
long)::operator()(this=0x000000016fdf15a0, offset=0, size=4) const at 
writer.cc:411:9
       frame #25: 0x0000000101f18f98 
libparquet.2500.0.0.dylib`parquet::arrow::FileWriterImpl::WriteTable(this=0x00000008c6c0d2c0,
 table=0x0000000103baeb48, chunk_size=4) at writer.cc:424:7
       frame #26: 0x00000001000165ac 
parquet-arrow-index-test`parquet::arrow::TestingWithPageIndex::WriteFile(this=0x00000008c701c050,
 
writer_properties=std::__1::shared_ptr<parquet::WriterProperties>::element_type 
@ 0x00000008c6c0cf00 strong=5 weak=0, 
table=std::__1::shared_ptr<arrow::Table>::element_type @ 0x0000000103baeb48 
strong=1 weak=0) at index_test.cc:141:5
       frame #27: 0x0000000100013a60 
parquet-arrow-index-test`parquet::arrow::ParquetPageIndexRoundTripTest_SimpleRoundTrip_Test::TestBody(this=0x00000008c701c040)
 at index_test.cc:232:3
   ```
   
   ### Component(s)
   
   C++, Parquet


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to