This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d7dc586c11 GH-45048: [C++][Parquet] Deprecate unused `chunk_size` 
parameter in `parquet::arrow::FileWriter::NewRowGroup()` (#45088)
d7dc586c11 is described below

commit d7dc586c11b5ce903cadc946c55dcacadcc85668
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Wed Jan 15 14:30:10 2025 +0100

    GH-45048: [C++][Parquet] Deprecate unused `chunk_size` parameter in 
`parquet::arrow::FileWriter::NewRowGroup()` (#45088)
    
    
    
    ### Rationale for this change
    
    Just noticed that the implementation doesn't use the parameter.
    
    ### What changes are included in this PR?
    
    Remove the parameter from `NewRowGroup()`
    
    ### Are these changes tested?
    
    ### Are there any user-facing changes?
    
    The `chunk_size` parameter is now deprecated.
    
    * GitHub Issue: #45048
    
    Lead-authored-by: Krisztian Szucs <[email protected]>
    Co-authored-by: Raúl Cumplido <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Raúl Cumplido <[email protected]>
---
 c_glib/parquet-glib/arrow-file-writer.cpp         |  7 ++-----
 c_glib/parquet-glib/arrow-file-writer.h           |  4 +---
 c_glib/test/parquet/test-arrow-file-writer.rb     |  4 ++--
 cpp/src/arrow/dataset/file_parquet_test.cc        |  3 +--
 cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 10 +++++-----
 cpp/src/parquet/arrow/writer.cc                   |  4 ++--
 cpp/src/parquet/arrow/writer.h                    |  9 +++++++--
 python/pyarrow/_parquet.pxd                       |  2 +-
 8 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp 
b/c_glib/parquet-glib/arrow-file-writer.cpp
index 2b8e2bdeac..738fb4fd82 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -574,7 +574,6 @@ 
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
 /**
  * gparquet_arrow_file_writer_new_row_group:
  * @writer: A #GParquetArrowFileWriter.
- * @chunk_size: The max number of rows in a row group.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Start a new row group.
@@ -584,13 +583,11 @@ 
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
  * Since: 18.0.0
  */
 gboolean
-gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
-                                         gsize chunk_size,
-                                         GError **error)
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, 
GError **error)
 {
   auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
   return garrow::check(error,
-                       parquet_arrow_file_writer->NewRowGroup(chunk_size),
+                       parquet_arrow_file_writer->NewRowGroup(),
                        "[parquet][arrow][file-writer][new-row-group]");
 }
 
diff --git a/c_glib/parquet-glib/arrow-file-writer.h 
b/c_glib/parquet-glib/arrow-file-writer.h
index 2c82f7c1f8..4986430c95 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -135,9 +135,7 @@ 
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
 
 GPARQUET_AVAILABLE_IN_18_0
 gboolean
-gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
-                                         gsize chunk_size,
-                                         GError **error);
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer, 
GError **error);
 
 GPARQUET_AVAILABLE_IN_18_0
 gboolean
diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb 
b/c_glib/test/parquet/test-arrow-file-writer.rb
index d8344bf1c5..418de4782d 100644
--- a/c_glib/test/parquet/test-arrow-file-writer.rb
+++ b/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -89,10 +89,10 @@ class TestParquetArrowFileWriter < Test::Unit::TestCase
   def test_write_chunked_array
     schema = build_schema("enabled" => :boolean)
     writer = Parquet::ArrowFileWriter.new(schema, @file.path)
-    writer.new_row_group(2)
+    writer.new_row_group
     chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])])
     writer.write_chunked_array(chunked_array)
-    writer.new_row_group(1)
+    writer.new_row_group
     chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])])
     writer.write_chunked_array(chunked_array)
     writer.close
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc 
b/cpp/src/arrow/dataset/file_parquet_test.cc
index 2c05dcd9be..536fcdb21c 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -85,7 +85,6 @@ class ParquetFormatHelper {
   static Status WriteRecordBatch(const RecordBatch& batch,
                                  parquet::arrow::FileWriter* writer) {
     auto schema = batch.schema();
-    auto size = batch.num_rows();
 
     if (!schema->Equals(*writer->schema(), false)) {
       return Status::Invalid("RecordBatch schema does not match this writer's. 
batch:'",
@@ -93,7 +92,7 @@ class ParquetFormatHelper {
                              "'");
     }
 
-    RETURN_NOT_OK(writer->NewRowGroup(size));
+    RETURN_NOT_OK(writer->NewRowGroup());
     for (int i = 0; i < batch.num_columns(); i++) {
       RETURN_NOT_OK(writer->WriteColumnChunk(*batch.column(i)));
     }
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc 
b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 856c032c35..cedcebbfb6 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -739,7 +739,7 @@ class ParquetIOTestBase : public ::testing::Test {
     ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(),
                                         MakeWriter(schema), arrow_schema,
                                         default_arrow_writer_properties(), 
&writer));
-    ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
+    ASSERT_OK_NO_THROW(writer->NewRowGroup());
     ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
     ASSERT_OK_NO_THROW(writer->Close());
     // writer->Close() should be idempotent
@@ -1053,7 +1053,7 @@ TYPED_TEST(TestParquetIO, 
SingleColumnRequiredChunkedWrite) {
                                       this->MakeWriter(schema), arrow_schema,
                                       default_arrow_writer_properties(), 
&writer));
   for (int i = 0; i < 4; i++) {
-    ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
+    ASSERT_OK_NO_THROW(writer->NewRowGroup());
     std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, 
chunk_size);
     ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
   }
@@ -1126,7 +1126,7 @@ TYPED_TEST(TestParquetIO, 
SingleColumnOptionalChunkedWrite) {
                                       this->MakeWriter(schema), arrow_schema,
                                       default_arrow_writer_properties(), 
&writer));
   for (int i = 0; i < 4; i++) {
-    ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
+    ASSERT_OK_NO_THROW(writer->NewRowGroup());
     std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, 
chunk_size);
     ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
   }
@@ -5149,7 +5149,7 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public 
TestParquetIO<TestType> {
         ::arrow::default_memory_pool(),
         ParquetFileWriter::Open(this->sink_, schema_node, writer_properties),
         arrow_schema, default_arrow_writer_properties(), &writer));
-    ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
+    ASSERT_OK_NO_THROW(writer->NewRowGroup());
     ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
     ASSERT_OK_NO_THROW(writer->Close());
   }
@@ -5481,7 +5481,7 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) {
   // Operations on closed writer are invalid
   ASSERT_OK(writer->Close());
 
-  ASSERT_RAISES(Invalid, writer->NewRowGroup(1));
+  ASSERT_RAISES(Invalid, writer->NewRowGroup());
   ASSERT_RAISES(Invalid, writer->WriteColumnChunk(table->column(0), 0, 1));
   ASSERT_RAISES(Invalid, writer->NewBufferedRowGroup());
   ASSERT_OK_AND_ASSIGN(auto record_batch, table->CombineChunksToBatch());
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 463713df1b..c6d86648c1 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -305,7 +305,7 @@ class FileWriterImpl : public FileWriter {
                                 default_arrow_reader_properties(), 
&schema_manifest_);
   }
 
-  Status NewRowGroup(int64_t chunk_size) override {
+  Status NewRowGroup() override {
     RETURN_NOT_OK(CheckClosed());
     if (row_group_writer_ != nullptr) {
       PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
@@ -379,7 +379,7 @@ class FileWriterImpl : public FileWriter {
     }
 
     auto WriteRowGroup = [&](int64_t offset, int64_t size) {
-      RETURN_NOT_OK(NewRowGroup(size));
+      RETURN_NOT_OK(NewRowGroup());
       for (int i = 0; i < table.num_columns(); i++) {
         RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size));
       }
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 4e1ddafd9a..e36b8f252c 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -87,9 +87,14 @@ class PARQUET_EXPORT FileWriter {
   /// \brief Start a new row group.
   ///
   /// Returns an error if not all columns have been written.
+  virtual ::arrow::Status NewRowGroup() = 0;
+
+  /// \brief Start a new row group.
   ///
-  /// \param chunk_size the number of rows in the next row group.
-  virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0;
+  /// \deprecated Deprecated in 19.0.0.
+  ARROW_DEPRECATED(
+      "Deprecated in 19.0.0. Use NewRowGroup() without the `chunk_size` 
argument.")
+  virtual ::arrow::Status NewRowGroup(int64_t chunk_size) { return 
NewRowGroup(); }
 
   /// \brief Write ColumnChunk in row group using an array.
   virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0;
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index d6aebd8284..71e93ce0a4 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -556,7 +556,7 @@ cdef extern from "parquet/arrow/writer.h" namespace 
"parquet::arrow" nogil:
                                              const 
shared_ptr[ArrowWriterProperties]& arrow_properties)
 
         CStatus WriteTable(const CTable& table, int64_t chunk_size)
-        CStatus NewRowGroup(int64_t chunk_size)
+        CStatus NewRowGroup()
         CStatus Close()
         CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& 
key_value_metadata)
 

Reply via email to