This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 8556001e6a GH-44006: [GLib][Parquet] Add
`gparquet_arrow_file_writer_new_row_group()` (#44039)
8556001e6a is described below
commit 8556001e6a8b4c7f35d4e18c28704d7811005904
Author: Sutou Kouhei <[email protected]>
AuthorDate: Wed Sep 11 11:02:26 2024 +0900
GH-44006: [GLib][Parquet] Add `gparquet_arrow_file_writer_new_row_group()`
(#44039)
### Rationale for this change
This is a low-level API to control how to write data. This is for advanced
users.
### What changes are included in this PR?
`gparquet_arrow_file_writer_write_chunked_array()` is also added to write a
test for `gparquet_arrow_file_writer_new_row_group()`.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #44006
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/parquet-glib/arrow-file-writer.cpp | 50 +++++++++++++++++++++++++--
c_glib/parquet-glib/arrow-file-writer.h | 14 +++++++-
c_glib/test/parquet/test-arrow-file-writer.rb | 30 ++++++++++++++++
3 files changed, 90 insertions(+), 4 deletions(-)
diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp
b/c_glib/parquet-glib/arrow-file-writer.cpp
index 0d0e87e7e3..7a672f1f21 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -548,13 +548,57 @@
gparquet_arrow_file_writer_write_record_batch(GParquetArrowFileWriter *writer,
gboolean
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
GArrowTable *table,
- guint64 chunk_size,
+ gsize chunk_size,
GError **error)
{
auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
auto arrow_table = garrow_table_get_raw(table).get();
- auto status = parquet_arrow_file_writer->WriteTable(*arrow_table,
chunk_size);
- return garrow_error_check(error, status,
"[parquet][arrow][file-writer][write-table]");
+ return garrow::check(error,
+ parquet_arrow_file_writer->WriteTable(*arrow_table,
chunk_size),
+ "[parquet][arrow][file-writer][write-table]");
+}
+
+/**
+ * gparquet_arrow_file_writer_new_row_group:
+ * @writer: A #GParquetArrowFileWriter.
+ * @chunk_size: The max number of rows in a row group.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
+ gsize chunk_size,
+ GError **error)
+{
+ auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+ return garrow::check(error,
+ parquet_arrow_file_writer->NewRowGroup(chunk_size),
+ "[parquet][arrow][file-writer][new-row-group]");
+}
+
+/**
+ * gparquet_arrow_file_writer_write_chunked_array:
+ * @writer: A #GParquetArrowFileWriter.
+ * @chunked_array: A #GArrowChunkedArray to be written.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
+ GArrowChunkedArray
*chunked_array,
+ GError **error)
+{
+ auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+ auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
+ return garrow::check(error,
+
parquet_arrow_file_writer->WriteColumnChunk(arrow_chunked_array),
+ "[parquet][arrow][file-writer][write-chunked-array]");
}
/**
diff --git a/c_glib/parquet-glib/arrow-file-writer.h
b/c_glib/parquet-glib/arrow-file-writer.h
index 7eb14fe27a..40595bdfef 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -130,9 +130,21 @@ GPARQUET_AVAILABLE_IN_0_11
gboolean
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
GArrowTable *table,
- guint64 chunk_size,
+ gsize chunk_size,
GError **error);
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
+ gsize chunk_size,
+ GError **error);
+
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
+ GArrowChunkedArray
*chunked_array,
+ GError **error);
+
GPARQUET_AVAILABLE_IN_0_11
gboolean
gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, GError
**error);
diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb
b/c_glib/test/parquet/test-arrow-file-writer.rb
index e348c9b679..89db16c6fb 100644
--- a/c_glib/test/parquet/test-arrow-file-writer.rb
+++ b/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -82,4 +82,34 @@ class TestParquetArrowFileWriter < Test::Unit::TestCase
reader.unref
end
end
+
+ def test_write_chunked_array
+ schema = build_schema("enabled" => :boolean)
+ writer = Parquet::ArrowFileWriter.new(schema, @file.path)
+ writer.new_row_group(2)
+ chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])])
+ writer.write_chunked_array(chunked_array)
+ writer.new_row_group(1)
+ chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])])
+ writer.write_chunked_array(chunked_array)
+ writer.close
+
+ reader = Parquet::ArrowFileReader.new(@file.path)
+ begin
+ reader.use_threads = true
+ assert_equal([
+ 2,
+ build_table("enabled" => [
+ build_boolean_array([true, nil]),
+ build_boolean_array([false]),
+ ]),
+ ],
+ [
+ reader.n_row_groups,
+ reader.read_table,
+ ])
+ ensure
+ reader.unref
+ end
+ end
end