This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new dafc970e88 GH-44007: [GLib][Parquet] Add
`gparquet_arrow_file_writer_new_buffered_row_group()` (#44100)
dafc970e88 is described below
commit dafc970e883091d2b81c232eca4b842846dea408
Author: Sutou Kouhei <[email protected]>
AuthorDate: Sun Sep 15 20:31:42 2024 +0900
GH-44007: [GLib][Parquet] Add
`gparquet_arrow_file_writer_new_buffered_row_group()` (#44100)
### Rationale for this change
It's useful for advanced use.
### What changes are included in this PR?
Add `gparquet_arrow_file_writer_new_buffered_row_group()`.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #44007
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/parquet-glib/arrow-file-writer.cpp | 38 +++++++++++++++++++++++++++
c_glib/parquet-glib/arrow-file-writer.h | 5 ++++
c_glib/test/parquet/test-arrow-file-writer.rb | 7 +++--
3 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp
b/c_glib/parquet-glib/arrow-file-writer.cpp
index 7a672f1f21..2b8e2bdeac 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -517,6 +517,19 @@
gparquet_arrow_file_writer_get_schema(GParquetArrowFileWriter *writer)
* @record_batch: A record batch to be written.
* @error: (nullable): Return location for a #GError or %NULL.
*
+ * Write a record batch into the buffered row group.
+ *
+ * Multiple record batches can be written into the same row group
+ * through this function.
+ *
+ * gparquet_writer_properties_get_max_row_group_length() is respected
+ * and a new row group will be created if the current row group
+ * exceeds the limit.
+ *
+ * Record batches get flushed to the output stream once
+ * gparquet_file_writer_new_buffered_row_group() or
+ * gparquet_file_writer_close() is called.
+ *
* Returns: %TRUE on success, %FALSE if there was an error.
*
* Since: 18.0.0
@@ -564,6 +577,8 @@
gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
* @chunk_size: The max number of rows in a row group.
* @error: (nullable): Return location for a #GError or %NULL.
*
+ * Start a new row group.
+ *
* Returns: %TRUE on success, %FALSE if there was an error.
*
* Since: 18.0.0
@@ -579,12 +594,35 @@
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
"[parquet][arrow][file-writer][new-row-group]");
}
+/**
+ * gparquet_arrow_file_writer_new_buffered_row_group:
+ * @writer: A #GParquetArrowFileWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Start a new buffered row group.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_new_buffered_row_group(GParquetArrowFileWriter
*writer,
+ GError **error)
+{
+ auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+ return garrow::check(error,
+ parquet_arrow_file_writer->NewBufferedRowGroup(),
+
"[parquet][arrow][file-writer][new-buffered-row-group]");
+}
+
/**
* gparquet_arrow_file_writer_write_chunked_array:
* @writer: A #GParquetArrowFileWriter.
* @chunked_array: A #GArrowChunkedArray to be written.
* @error: (nullable): Return location for a #GError or %NULL.
*
+ * Start a chunked array as a column chunk.
+ *
* Returns: %TRUE on success, %FALSE if there was an error.
*
* Since: 18.0.0
diff --git a/c_glib/parquet-glib/arrow-file-writer.h
b/c_glib/parquet-glib/arrow-file-writer.h
index 40595bdfef..2c82f7c1f8 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -139,6 +139,11 @@
gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
gsize chunk_size,
GError **error);
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_new_buffered_row_group(GParquetArrowFileWriter
*writer,
+ GError **error);
+
GPARQUET_AVAILABLE_IN_18_0
gboolean
gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb
b/c_glib/test/parquet/test-arrow-file-writer.rb
index 89db16c6fb..d8344bf1c5 100644
--- a/c_glib/test/parquet/test-arrow-file-writer.rb
+++ b/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -40,14 +40,17 @@ class TestParquetArrowFileWriter < Test::Unit::TestCase
writer = Parquet::ArrowFileWriter.new(record_batch.schema, @file.path)
writer.write_record_batch(record_batch)
+ writer.new_buffered_row_group
+ writer.write_record_batch(record_batch)
writer.close
reader = Parquet::ArrowFileReader.new(@file.path)
begin
reader.use_threads = true
assert_equal([
- 1,
- Arrow::Table.new(record_batch.schema, [record_batch]),
+ 2,
+ Arrow::Table.new(record_batch.schema,
+ [record_batch, record_batch]),
],
[
reader.n_row_groups,