This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new cb61dfe217 GH-48691: [C++][Parquet] Write serializer may crash if the 
value buffer is empty (#48692)
cb61dfe217 is described below

commit cb61dfe217872f64d0e7839eb34ca9bcb37f2f84
Author: Rex(Hui) An <[email protected]>
AuthorDate: Tue Jan 27 17:44:07 2026 +0800

    GH-48691: [C++][Parquet] Write serializer may crash if the value buffer is 
empty (#48692)
    
    ### Rationale for this change
    WriteArrowSerialize could unconditionally read values from the Arrow array 
even for null rows. Since it's possible the caller could provided a zero-sized 
dummy buffer for all-null arrays, this caused an ASAN heap-buffer-overflow.
    
    ### What changes are included in this PR?
    Early check the array is not all null values before serialize it
    
    ### Are these changes tested?
    
    Added tests.
    ### Are there any user-facing changes?
    
    No
    
    * GitHub Issue: #48691
    
    Authored-by: rexan <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
---
 cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 29 +++++++++++++++++++++++
 cpp/src/parquet/column_writer.cc                  |  7 +++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc 
b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index edb59d9de3..29cc5678e4 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -5889,5 +5889,34 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) {
   ASSERT_RAISES(Invalid, writer->WriteTable(*table, 1));
 }
 
+TEST(TestArrowReadWrite, AllNulls) {
+  auto schema = ::arrow::schema({::arrow::field("all_nulls", 
::arrow::int8())});
+
+  constexpr int64_t length = 3;
+  ASSERT_OK_AND_ASSIGN(auto null_bitmap, ::arrow::AllocateEmptyBitmap(length));
+  auto array_data = ::arrow::ArrayData::Make(
+      ::arrow::int8(), length, {null_bitmap, /*values=*/nullptr}, 
/*null_count=*/length);
+  auto array = ::arrow::MakeArray(array_data);
+  auto record_batch = ::arrow::RecordBatch::Make(schema, length, {array});
+
+  auto sink = CreateOutputStream();
+  ASSERT_OK_AND_ASSIGN(auto writer, parquet::arrow::FileWriter::Open(
+                                        *schema, 
::arrow::default_memory_pool(), sink,
+                                        parquet::default_writer_properties(),
+                                        
parquet::default_arrow_writer_properties()));
+  ASSERT_OK(writer->WriteRecordBatch(*record_batch));
+  ASSERT_OK(writer->Close());
+  ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish());
+
+  std::shared_ptr<::arrow::Table> read_table;
+  ASSERT_OK_AND_ASSIGN(auto reader,
+                       
parquet::arrow::OpenFile(std::make_shared<BufferReader>(buffer),
+                                                
::arrow::default_memory_pool()));
+  ASSERT_OK(reader->ReadTable(&read_table));
+  auto expected_table = ::arrow::Table::Make(
+      schema, {::arrow::ArrayFromJSON(::arrow::int8(), R"([null, null, 
null])")});
+  ASSERT_TRUE(expected_table->Equals(*read_table));
+}
+
 }  // namespace arrow
 }  // namespace parquet
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 20b8cc98ca..797d435e73 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -2099,7 +2099,12 @@ Status 
TypedColumnWriterImpl<ParquetType>::WriteArrowSerialize(
   PARQUET_THROW_NOT_OK(ctx->GetScratchData<ParquetCType>(array.length(), 
&buffer));
 
   SerializeFunctor<ParquetType, ArrowType> functor;
-  RETURN_NOT_OK(functor.Serialize(checked_cast<const ArrayType&>(array), ctx, 
buffer));
+  // The value buffer could be empty if all values are nulls.
+  // The output buffer will then remain uninitialized, but that's ok since
+  // null value slots are not written in Parquet.
+  if (array.null_count() != array.length()) {
+    RETURN_NOT_OK(functor.Serialize(checked_cast<const ArrayType&>(array), 
ctx, buffer));
+  }
   bool no_nulls =
       this->descr()->schema_node()->is_required() || (array.null_count() == 0);
   if (!maybe_parent_nulls && no_nulls) {

Reply via email to