This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 8556001e6a GH-44006: [GLib][Parquet] Add 
`gparquet_arrow_file_writer_new_row_group()` (#44039)
8556001e6a is described below

commit 8556001e6a8b4c7f35d4e18c28704d7811005904
Author: Sutou Kouhei <[email protected]>
AuthorDate: Wed Sep 11 11:02:26 2024 +0900

    GH-44006: [GLib][Parquet] Add `gparquet_arrow_file_writer_new_row_group()` 
(#44039)
    
    ### Rationale for this change
    
    This is a low-level API to control how to write data. This is for advanced 
users.
    
    ### What changes are included in this PR?
    
    `gparquet_arrow_file_writer_write_chunked_array()` is also added to write a 
test for `gparquet_arrow_file_writer_new_row_group()`.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #44006
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/parquet-glib/arrow-file-writer.cpp     | 50 +++++++++++++++++++++++++--
 c_glib/parquet-glib/arrow-file-writer.h       | 14 +++++++-
 c_glib/test/parquet/test-arrow-file-writer.rb | 30 ++++++++++++++++
 3 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp 
b/c_glib/parquet-glib/arrow-file-writer.cpp
index 0d0e87e7e3..7a672f1f21 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -548,13 +548,57 @@ 
gparquet_arrow_file_writer_write_record_batch(GParquetArrowFileWriter *writer,
 gboolean
 gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
                                        GArrowTable *table,
-                                       guint64 chunk_size,
+                                       gsize chunk_size,
                                        GError **error)
 {
   auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
   auto arrow_table = garrow_table_get_raw(table).get();
-  auto status = parquet_arrow_file_writer->WriteTable(*arrow_table, 
chunk_size);
-  return garrow_error_check(error, status, 
"[parquet][arrow][file-writer][write-table]");
+  return garrow::check(error,
+                       parquet_arrow_file_writer->WriteTable(*arrow_table, 
chunk_size),
+                       "[parquet][arrow][file-writer][write-table]");
+}
+
+/**
+ * gparquet_arrow_file_writer_new_row_group:
+ * @writer: A #GParquetArrowFileWriter.
+ * @chunk_size: The max number of rows in a row group.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
+                                         gsize chunk_size,
+                                         GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  return garrow::check(error,
+                       parquet_arrow_file_writer->NewRowGroup(chunk_size),
+                       "[parquet][arrow][file-writer][new-row-group]");
+}
+
+/**
+ * gparquet_arrow_file_writer_write_chunked_array:
+ * @writer: A #GParquetArrowFileWriter.
+ * @chunked_array: A #GArrowChunkedArray to be written.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
+                                               GArrowChunkedArray 
*chunked_array,
+                                               GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
+  return garrow::check(error,
+                       
parquet_arrow_file_writer->WriteColumnChunk(arrow_chunked_array),
+                       "[parquet][arrow][file-writer][write-chunked-array]");
 }
 
 /**
diff --git a/c_glib/parquet-glib/arrow-file-writer.h 
b/c_glib/parquet-glib/arrow-file-writer.h
index 7eb14fe27a..40595bdfef 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -130,9 +130,21 @@ GPARQUET_AVAILABLE_IN_0_11
 gboolean
 gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
                                        GArrowTable *table,
-                                       guint64 chunk_size,
+                                       gsize chunk_size,
                                        GError **error);
 
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
+                                         gsize chunk_size,
+                                         GError **error);
+
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
+                                               GArrowChunkedArray 
*chunked_array,
+                                               GError **error);
+
 GPARQUET_AVAILABLE_IN_0_11
 gboolean
 gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, GError 
**error);
diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb 
b/c_glib/test/parquet/test-arrow-file-writer.rb
index e348c9b679..89db16c6fb 100644
--- a/c_glib/test/parquet/test-arrow-file-writer.rb
+++ b/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -82,4 +82,34 @@ class TestParquetArrowFileWriter < Test::Unit::TestCase
       reader.unref
     end
   end
+
+  def test_write_chunked_array
+    schema = build_schema("enabled" => :boolean)
+    writer = Parquet::ArrowFileWriter.new(schema, @file.path)
+    writer.new_row_group(2)
+    chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])])
+    writer.write_chunked_array(chunked_array)
+    writer.new_row_group(1)
+    chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])])
+    writer.write_chunked_array(chunked_array)
+    writer.close
+
+    reader = Parquet::ArrowFileReader.new(@file.path)
+    begin
+      reader.use_threads = true
+      assert_equal([
+                     2,
+                     build_table("enabled" => [
+                                   build_boolean_array([true, nil]),
+                                   build_boolean_array([false]),
+                                 ]),
+                   ],
+                   [
+                     reader.n_row_groups,
+                     reader.read_table,
+                   ])
+    ensure
+      reader.unref
+    end
+  end
 end

Reply via email to