This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 34af77ef00 GH-33750: [GLib] Add 
garrow_table_batch_reader_set_max_chunk_size() (#34601)
34af77ef00 is described below

commit 34af77ef0087b933c12a4616c9179cf298627dbd
Author: Sutou Kouhei <[email protected]>
AuthorDate: Sat Mar 18 12:26:23 2023 +0900

    GH-33750: [GLib] Add garrow_table_batch_reader_set_max_chunk_size() (#34601)
    
    ### Rationale for this change
    
    This is a missing feature.
    
    ### What changes are included in this PR?
    
    This adds a binding.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * Closes: #33750
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/arrow-glib/reader.cpp           | 27 +++++++++++++++++++++++++++
 c_glib/arrow-glib/reader.h             |  5 +++++
 c_glib/arrow-glib/reader.hpp           |  6 +++++-
 c_glib/test/test-table-batch-reader.rb | 12 ++++++++++++
 4 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 1d7bb5ab1a..16532361d3 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -386,6 +386,26 @@ garrow_table_batch_reader_new(GArrowTable *table)
   return garrow_table_batch_reader_new_raw(&arrow_table_batch_reader);
 }
 
+/**
+ * garrow_table_batch_reader_set_max_chunk_size:
+ * @reader: A #GArrowTableBatchReader.
+ * @max_chunk_size: The maximum chunk size of record batches.
+ *
+ * Set the desired maximum chunk size of record batches.
+ *
+ * The actual chunk size of each record batch may be smaller,
+ * depending on actual chunking characteristics of each table column.
+ *
+ * Since: 12.0.0
+ */
+void
+garrow_table_batch_reader_set_max_chunk_size(GArrowTableBatchReader *reader,
+                                             gint64 max_chunk_size)
+{
+  auto arrow_reader = garrow_table_batch_reader_get_raw(reader);
+  arrow_reader->set_chunksize(max_chunk_size);
+}
+
 
 G_DEFINE_TYPE(GArrowRecordBatchStreamReader,
               garrow_record_batch_stream_reader,
@@ -2238,6 +2258,13 @@ 
garrow_table_batch_reader_new_raw(std::shared_ptr<arrow::TableBatchReader> *arro
   return reader;
 }
 
+std::shared_ptr<arrow::TableBatchReader>
+garrow_table_batch_reader_get_raw(GArrowTableBatchReader *reader)
+{
+  return std::static_pointer_cast<arrow::TableBatchReader>(
+    garrow_record_batch_reader_get_raw(GARROW_RECORD_BATCH_READER(reader)));
+}
+
 GArrowRecordBatchStreamReader *
 
garrow_record_batch_stream_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchStreamReader>
 *arrow_reader)
 {
diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h
index 1bdd322e60..87171913cb 100644
--- a/c_glib/arrow-glib/reader.h
+++ b/c_glib/arrow-glib/reader.h
@@ -92,6 +92,11 @@ struct _GArrowTableBatchReaderClass
 
 GArrowTableBatchReader *garrow_table_batch_reader_new(GArrowTable *table);
 
+GARROW_AVAILABLE_IN_12_0
+void
+garrow_table_batch_reader_set_max_chunk_size(GArrowTableBatchReader *reader,
+                                             gint64 max_chunk_size);
+
 
 #define GARROW_TYPE_RECORD_BATCH_STREAM_READER          \
   (garrow_record_batch_stream_reader_get_type())
diff --git a/c_glib/arrow-glib/reader.hpp b/c_glib/arrow-glib/reader.hpp
index c7b2b76f21..8e9a7ea67b 100644
--- a/c_glib/arrow-glib/reader.hpp
+++ b/c_glib/arrow-glib/reader.hpp
@@ -30,7 +30,11 @@
 GArrowRecordBatchReader 
*garrow_record_batch_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchReader>
 *arrow_reader);
 std::shared_ptr<arrow::ipc::RecordBatchReader> 
garrow_record_batch_reader_get_raw(GArrowRecordBatchReader *reader);
 
-GArrowTableBatchReader 
*garrow_table_batch_reader_new_raw(std::shared_ptr<arrow::TableBatchReader> 
*arrow_reader);
+GArrowTableBatchReader *
+garrow_table_batch_reader_new_raw(
+  std::shared_ptr<arrow::TableBatchReader> *arrow_reader);
+std::shared_ptr<arrow::TableBatchReader>
+garrow_table_batch_reader_get_raw(GArrowTableBatchReader *reader);
 
 GArrowRecordBatchStreamReader 
*garrow_record_batch_stream_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchStreamReader>
 *arrow_reader);
 
diff --git a/c_glib/test/test-table-batch-reader.rb 
b/c_glib/test/test-table-batch-reader.rb
index b161c8a558..3b00b9067a 100644
--- a/c_glib/test/test-table-batch-reader.rb
+++ b/c_glib/test/test-table-batch-reader.rb
@@ -39,4 +39,16 @@ class TestTableBatchReader < Test::Unit::TestCase
     reader = Arrow::TableBatchReader.new(table)
     assert_equal(table.schema, reader.schema)
   end
+
+  def test_max_chunk_size
+    array = build_int32_array([1, 2, 3])
+    table = build_table("number" => array)
+    reader = Arrow::TableBatchReader.new(table)
+    reader.max_chunk_size = 2
+    assert_equal(build_record_batch("number" => build_int32_array([1, 2])),
+                 reader.read_next)
+    assert_equal(build_record_batch("number" => build_int32_array([3])),
+                 reader.read_next)
+    assert_nil(reader.read_next)
+  end
 end

Reply via email to