kou commented on code in PR #41750:
URL: https://github.com/apache/arrow/pull/41750#discussion_r1614318431
##########
c_glib/arrow-dataset-glib/dataset.cpp:
##########
@@ -152,12 +153,45 @@ gadataset_dataset_to_table(GADatasetDataset *dataset,
GError **error)
}
auto arrow_scanner = *arrow_scanner_result;
auto arrow_table_result = arrow_scanner->ToTable();
- if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) {
+ if (!garrow::check(error, arrow_table_result, "[dataset][to-table]")) {
return NULL;
}
return garrow_table_new_raw(&(*arrow_table_result));
}
+/**
+ * gadataset_dataset_to_record_batch_reader:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A #GArrowRecordBatchReader on success, %NULL on error.
+ *
+ * Since: 17.0.0
+ */
+GArrowRecordBatchReader *
+gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError
**error)
+{
+ auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+ auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+ if (!garrow::check(error,
+ arrow_scanner_builder_result,
+ "[dataset][to-record-batch-reader]")) {
+ return NULL;
Review Comment:
Could you use `nullptr` in newly written code?
```suggestion
return nullptr;
```
##########
c_glib/arrow-dataset-glib/scanner.cpp:
##########
@@ -128,6 +128,27 @@ gadataset_scanner_to_table(GADatasetScanner *scanner,
GError **error)
}
}
+/**
+ * gadataset_scanner_to_record_batch_reader:
+ * @scanner: A #GADatasetScanner.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A #GArrowRecordBatchReader on success, %NULL on error.
+ *
+ * Since: 17.0.0
+ */
+GArrowRecordBatchReader *
+gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError
**error)
+{
+ auto arrow_scanner = gadataset_scanner_get_raw(scanner);
+ auto arrow_reader_result = arrow_scanner->ToRecordBatchReader();
+ if (!garrow::check(error, arrow_reader_result,
"[scanner][to-record-batch-reader]")) {
+ return NULL;
+ }
+ return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), nullptr);
Review Comment:
```suggestion
auto sources = g_list_prepend(nullptr, scanner);
return garrow_record_batch_reader_new_raw(&(*arrow_reader_result),
sources);
```
##########
c_glib/arrow-dataset-glib/dataset.cpp:
##########
@@ -152,12 +153,45 @@ gadataset_dataset_to_table(GADatasetDataset *dataset,
GError **error)
}
auto arrow_scanner = *arrow_scanner_result;
auto arrow_table_result = arrow_scanner->ToTable();
- if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) {
+ if (!garrow::check(error, arrow_table_result, "[dataset][to-table]")) {
return NULL;
}
return garrow_table_new_raw(&(*arrow_table_result));
}
+/**
+ * gadataset_dataset_to_record_batch_reader:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A #GArrowRecordBatchReader on success, %NULL on error.
+ *
+ * Since: 17.0.0
+ */
+GArrowRecordBatchReader *
+gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError
**error)
+{
+ auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+ auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+ if (!garrow::check(error,
+ arrow_scanner_builder_result,
+ "[dataset][to-record-batch-reader]")) {
+ return NULL;
+ }
+ auto arrow_scanner_builder = *arrow_scanner_builder_result;
+ auto arrow_scanner_result = arrow_scanner_builder->Finish();
+ if (!garrow::check(error, arrow_scanner_result,
"[dataset][to-record-batch-reader]")) {
+ return NULL;
+ }
+ auto arrow_scanner = *arrow_scanner_result;
+ auto arrow_reader_result = arrow_scanner->ToRecordBatchReader();
+ if (!garrow::check(error, arrow_reader_result,
"[dataset][to-record-batch-reader]")) {
+ return NULL;
Review Comment:
```suggestion
return nullptr;
```
##########
c_glib/arrow-dataset-glib/scanner.cpp:
##########
@@ -128,6 +128,27 @@ gadataset_scanner_to_table(GADatasetScanner *scanner,
GError **error)
}
}
+/**
+ * gadataset_scanner_to_record_batch_reader:
+ * @scanner: A #GADatasetScanner.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A #GArrowRecordBatchReader on success, %NULL on error.
+ *
+ * Since: 17.0.0
+ */
+GArrowRecordBatchReader *
+gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError
**error)
+{
+ auto arrow_scanner = gadataset_scanner_get_raw(scanner);
+ auto arrow_reader_result = arrow_scanner->ToRecordBatchReader();
+ if (!garrow::check(error, arrow_reader_result,
"[scanner][to-record-batch-reader]")) {
+ return NULL;
Review Comment:
```suggestion
return nullptr;
```
##########
c_glib/arrow-dataset-glib/dataset.cpp:
##########
@@ -152,12 +153,45 @@ gadataset_dataset_to_table(GADatasetDataset *dataset,
GError **error)
}
auto arrow_scanner = *arrow_scanner_result;
auto arrow_table_result = arrow_scanner->ToTable();
- if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) {
+ if (!garrow::check(error, arrow_table_result, "[dataset][to-table]")) {
return NULL;
}
return garrow_table_new_raw(&(*arrow_table_result));
}
+/**
+ * gadataset_dataset_to_record_batch_reader:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A #GArrowRecordBatchReader on success, %NULL on error.
+ *
+ * Since: 17.0.0
+ */
+GArrowRecordBatchReader *
+gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError
**error)
+{
+ auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+ auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+ if (!garrow::check(error,
+ arrow_scanner_builder_result,
+ "[dataset][to-record-batch-reader]")) {
+ return NULL;
+ }
+ auto arrow_scanner_builder = *arrow_scanner_builder_result;
+ auto arrow_scanner_result = arrow_scanner_builder->Finish();
+ if (!garrow::check(error, arrow_scanner_result,
"[dataset][to-record-batch-reader]")) {
+ return NULL;
Review Comment:
```suggestion
return nullptr;
```
##########
c_glib/test/dataset/test-file-system-dataset.rb:
##########
@@ -56,6 +65,17 @@ def test_partitioning
end
def test_read_write
+ dataset, expected_table = create_dataset
+ assert_equal(expected_table, dataset.to_table)
+ end
+
+ def test_record_batch_reader
Review Comment:
```suggestion
def test_to_record_batch_reader
```
##########
c_glib/arrow-dataset-glib/dataset.cpp:
##########
@@ -152,12 +153,45 @@ gadataset_dataset_to_table(GADatasetDataset *dataset,
GError **error)
}
auto arrow_scanner = *arrow_scanner_result;
auto arrow_table_result = arrow_scanner->ToTable();
- if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) {
+ if (!garrow::check(error, arrow_table_result, "[dataset][to-table]")) {
return NULL;
}
return garrow_table_new_raw(&(*arrow_table_result));
}
+/**
+ * gadataset_dataset_to_record_batch_reader:
+ * @dataset: A #GADatasetDataset.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full) (nullable):
+ * A #GArrowRecordBatchReader on success, %NULL on error.
+ *
+ * Since: 17.0.0
+ */
+GArrowRecordBatchReader *
+gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError
**error)
+{
+ auto arrow_dataset = gadataset_dataset_get_raw(dataset);
+ auto arrow_scanner_builder_result = arrow_dataset->NewScan();
+ if (!garrow::check(error,
+ arrow_scanner_builder_result,
+ "[dataset][to-record-batch-reader]")) {
+ return NULL;
+ }
+ auto arrow_scanner_builder = *arrow_scanner_builder_result;
+ auto arrow_scanner_result = arrow_scanner_builder->Finish();
+ if (!garrow::check(error, arrow_scanner_result,
"[dataset][to-record-batch-reader]")) {
+ return NULL;
+ }
+ auto arrow_scanner = *arrow_scanner_result;
+ auto arrow_reader_result = arrow_scanner->ToRecordBatchReader();
+ if (!garrow::check(error, arrow_reader_result,
"[dataset][to-record-batch-reader]")) {
+ return NULL;
+ }
+ return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), nullptr);
Review Comment:
Could you refer `dataset` to keep alive while the reader is alive?
```suggestion
auto sources = g_list_prepend(nullptr, dataset);
return garrow_record_batch_reader_new_raw(&(*arrow_reader_result),
sources);
```
##########
c_glib/test/dataset/test-scanner.rb:
##########
@@ -39,10 +40,22 @@ def setup
builder = @dataset.begin_scan
@scanner = builder.finish
yield
+ ensure
+ # We have to ignore errors trying to remove the directory due to
+ # the RecordBatchReader not closing files
+ # (https://github.com/apache/arrow/issues/41771).
+ # Also request GC first which should free any remaining RecordBatchReader
+ # and close open files.
+ GC.start
+ FileUtils.remove_entry(tmpdir, force: true)
end
end
def test_to_table
assert_equal(@table, @scanner.to_table)
end
+
+ def test_record_batch_reader
Review Comment:
```suggestion
def test_to_record_batch_reader
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]