This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new dafc970e88 GH-44007: [GLib][Parquet] Add 
`gparquet_arrow_file_writer_new_buffered_row_group()` (#44100)
dafc970e88 is described below

commit dafc970e883091d2b81c232eca4b842846dea408
Author: Sutou Kouhei <[email protected]>
AuthorDate: Sun Sep 15 20:31:42 2024 +0900

    GH-44007: [GLib][Parquet] Add 
`gparquet_arrow_file_writer_new_buffered_row_group()` (#44100)
    
    ### Rationale for this change
    
    It's useful for advanced use.
    
    ### What changes are included in this PR?
    
    Add `gparquet_arrow_file_writer_new_buffered_row_group()`.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #44007
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/parquet-glib/arrow-file-writer.cpp     | 38 +++++++++++++++++++++++++++
 c_glib/parquet-glib/arrow-file-writer.h       |  5 ++++
 c_glib/test/parquet/test-arrow-file-writer.rb |  7 +++--
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp 
b/c_glib/parquet-glib/arrow-file-writer.cpp
index 7a672f1f21..2b8e2bdeac 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -517,6 +517,19 @@ 
gparquet_arrow_file_writer_get_schema(GParquetArrowFileWriter *writer)
  * @record_batch: A record batch to be written.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
+ * Write a record batch into the buffered row group.
+ *
+ * Multiple record batches can be written into the same row group
+ * through this function.
+ *
+ * gparquet_writer_properties_get_max_row_group_length() is respected
+ * and a new row group will be created if the current row group
+ * exceeds the limit.
+ *
+ * Record batches get flushed to the output stream once
+ * gparquet_file_writer_new_buffered_row_group() or
+ * gparquet_file_writer_close() is called.
+ *
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 18.0.0
@@ -564,6 +577,8 @@ 
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
  * @chunk_size: The max number of rows in a row group.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
+ * Start a new row group.
+ *
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 18.0.0
@@ -579,12 +594,35 @@ 
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
                        "[parquet][arrow][file-writer][new-row-group]");
 }
 
+/**
+ * gparquet_arrow_file_writer_new_buffered_row_group:
+ * @writer: A #GParquetArrowFileWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Start a new buffered row group.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_new_buffered_row_group(GParquetArrowFileWriter 
*writer,
+                                                  GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  return garrow::check(error,
+                       parquet_arrow_file_writer->NewBufferedRowGroup(),
+                       
"[parquet][arrow][file-writer][new-buffered-row-group]");
+}
+
 /**
  * gparquet_arrow_file_writer_write_chunked_array:
  * @writer: A #GParquetArrowFileWriter.
  * @chunked_array: A #GArrowChunkedArray to be written.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
+ * Start a chunked array as a column chunk.
+ *
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
  * Since: 18.0.0
diff --git a/c_glib/parquet-glib/arrow-file-writer.h 
b/c_glib/parquet-glib/arrow-file-writer.h
index 40595bdfef..2c82f7c1f8 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -139,6 +139,11 @@ 
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
                                          gsize chunk_size,
                                          GError **error);
 
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_new_buffered_row_group(GParquetArrowFileWriter 
*writer,
+                                                  GError **error);
+
 GPARQUET_AVAILABLE_IN_18_0
 gboolean
 gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb 
b/c_glib/test/parquet/test-arrow-file-writer.rb
index 89db16c6fb..d8344bf1c5 100644
--- a/c_glib/test/parquet/test-arrow-file-writer.rb
+++ b/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -40,14 +40,17 @@ class TestParquetArrowFileWriter < Test::Unit::TestCase
 
     writer = Parquet::ArrowFileWriter.new(record_batch.schema, @file.path)
     writer.write_record_batch(record_batch)
+    writer.new_buffered_row_group
+    writer.write_record_batch(record_batch)
     writer.close
 
     reader = Parquet::ArrowFileReader.new(@file.path)
     begin
       reader.use_threads = true
       assert_equal([
-                     1,
-                     Arrow::Table.new(record_batch.schema, [record_batch]),
+                     2,
+                     Arrow::Table.new(record_batch.schema,
+                                      [record_batch, record_batch]),
                    ],
                    [
                      reader.n_row_groups,

Reply via email to