This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 5750e2932f GH-47664: [C++][Parquet] add num_rows_ before each call to 
RowGroupWriter::Close in FileSerializer (#47665)
5750e2932f is described below

commit 5750e2932fc26c27be92fe9262f6b128a513abca
Author: Zehua Zou <[email protected]>
AuthorDate: Thu Oct 9 21:54:01 2025 +0800

    GH-47664: [C++][Parquet] add num_rows_ before each call to 
RowGroupWriter::Close in FileSerializer (#47665)
    
    ### Rationale for this change
    
    Fix wrong result of `num_rows()` method in `FileSerializer`.
    
    ### What changes are included in this PR?
    
    1. add `num_rows_` before each call to `RowGroupWriter::Close` in 
`FileSerializer`.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Now `num_rows_` will return the corrent result which is the number of rows 
in the yet started RowGroups.
    
    * GitHub Issue: #47664
    
    Authored-by: Zehua Zou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/parquet/file_serialize_test.cc | 2 ++
 cpp/src/parquet/file_writer.cc         | 1 +
 2 files changed, 3 insertions(+)

diff --git a/cpp/src/parquet/file_serialize_test.cc 
b/cpp/src/parquet/file_serialize_test.cc
index fc356d5d24..f287e493a9 100644
--- a/cpp/src/parquet/file_serialize_test.cc
+++ b/cpp/src/parquet/file_serialize_test.cc
@@ -76,6 +76,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
     for (int rg = 0; rg < num_rowgroups_ / 2; ++rg) {
       RowGroupWriter* row_group_writer;
       row_group_writer = file_writer->AppendRowGroup();
+      EXPECT_EQ(rows_per_rowgroup_ * rg, file_writer->num_rows());
       for (int col = 0; col < num_columns_; ++col) {
         auto column_writer =
             
static_cast<TypedColumnWriter<TestType>*>(row_group_writer->NextColumn());
@@ -97,6 +98,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
     for (int rg = 0; rg < num_rowgroups_ / 2; ++rg) {
       RowGroupWriter* row_group_writer;
       row_group_writer = file_writer->AppendBufferedRowGroup();
+      EXPECT_EQ(rows_per_rowgroup_ * (rg + num_rowgroups_ / 2), 
file_writer->num_rows());
       for (int batch = 0; batch < (rows_per_rowgroup_ / rows_per_batch_); 
++batch) {
         for (int col = 0; col < num_columns_; ++col) {
           auto column_writer =
diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
index 8c19aecb0d..ddec2c0a56 100644
--- a/cpp/src/parquet/file_writer.cc
+++ b/cpp/src/parquet/file_writer.cc
@@ -358,6 +358,7 @@ class FileSerializer : public ParquetFileWriter::Contents {
 
   RowGroupWriter* AppendRowGroup(bool buffered_row_group) {
     if (row_group_writer_) {
+      num_rows_ += row_group_writer_->num_rows();
       row_group_writer_->Close();
     }
     int16_t row_group_ordinal = -1;  // row group ordinal not set

Reply via email to