This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b5d36e9f4d GH-36819: [R] Use RunWithCapturedR for reading Parquet
files (#37274)
b5d36e9f4d is described below
commit b5d36e9f4d323f274e06ed04285ddd4e2121ab19
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Sep 5 10:12:00 2023 -0300
GH-36819: [R] Use RunWithCapturedR for reading Parquet files (#37274)
### Rationale for this change
When we first added RunWithCapturedR to support reading files from R
connections, none of the Parquet tests seemed to call R from another thread.
Because RunWithCapturedR comes with some complexity, I didn't add it anywhere
it wasn't strictly needed. A recent StackOverflow post exposed that reading
very large parquet files do use multiple threads and thus need RunWithCapturedR.
### What changes are included in this PR?
The two most common calls to read a parquet in which a user might trigger
this failure are now wrapped in RunWithCapturedR.
### Are these changes tested?
The changes are tested in the current suite.
### Are there any user-facing changes?
No.
* Closes: #36819
Lead-authored-by: Dewey Dunnington <[email protected]>
Co-authored-by: Dewey Dunnington <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
---
r/src/parquet.cpp | 37 ++++++++++++++++++++++++++++++-------
1 file changed, 30 insertions(+), 7 deletions(-)
diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp
index f9f7b0447c..c32d064059 100644
--- a/r/src/parquet.cpp
+++ b/r/src/parquet.cpp
@@ -17,6 +17,8 @@
#include "./arrow_types.h"
+#include "./safe-call-into-r.h"
+
#if defined(ARROW_R_WITH_PARQUET)
#include <arrow/table.h>
@@ -129,7 +131,10 @@ std::shared_ptr<parquet::arrow::FileReader>
parquet___arrow___FileReader__OpenFi
std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable1(
const std::shared_ptr<parquet::arrow::FileReader>& reader) {
std::shared_ptr<arrow::Table> table;
- PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
+ auto result =
+ RunWithCapturedRIfPossibleVoid([&]() { return reader->ReadTable(&table);
});
+
+ StopIfNotOk(result);
return table;
}
@@ -138,7 +143,10 @@ std::shared_ptr<arrow::Table>
parquet___arrow___FileReader__ReadTable2(
const std::shared_ptr<parquet::arrow::FileReader>& reader,
const std::vector<int>& column_indices) {
std::shared_ptr<arrow::Table> table;
- PARQUET_THROW_NOT_OK(reader->ReadTable(column_indices, &table));
+ auto result = RunWithCapturedRIfPossibleVoid(
+ [&]() { return reader->ReadTable(column_indices, &table); });
+
+ StopIfNotOk(result);
return table;
}
@@ -146,7 +154,10 @@ std::shared_ptr<arrow::Table>
parquet___arrow___FileReader__ReadTable2(
std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadRowGroup1(
const std::shared_ptr<parquet::arrow::FileReader>& reader, int i) {
std::shared_ptr<arrow::Table> table;
- PARQUET_THROW_NOT_OK(reader->ReadRowGroup(i, &table));
+ auto result =
+ RunWithCapturedRIfPossibleVoid([&]() { return reader->ReadRowGroup(i,
&table); });
+
+ StopIfNotOk(result);
return table;
}
@@ -155,7 +166,10 @@ std::shared_ptr<arrow::Table>
parquet___arrow___FileReader__ReadRowGroup2(
const std::shared_ptr<parquet::arrow::FileReader>& reader, int i,
const std::vector<int>& column_indices) {
std::shared_ptr<arrow::Table> table;
- PARQUET_THROW_NOT_OK(reader->ReadRowGroup(i, column_indices, &table));
+ auto result = RunWithCapturedRIfPossibleVoid(
+ [&]() { return reader->ReadRowGroup(i, column_indices, &table); });
+
+ StopIfNotOk(result);
return table;
}
@@ -164,7 +178,10 @@ std::shared_ptr<arrow::Table>
parquet___arrow___FileReader__ReadRowGroups1(
const std::shared_ptr<parquet::arrow::FileReader>& reader,
const std::vector<int>& row_groups) {
std::shared_ptr<arrow::Table> table;
- PARQUET_THROW_NOT_OK(reader->ReadRowGroups(row_groups, &table));
+ auto result = RunWithCapturedRIfPossibleVoid(
+ [&]() { return reader->ReadRowGroups(row_groups, &table); });
+
+ StopIfNotOk(result);
return table;
}
@@ -173,7 +190,10 @@ std::shared_ptr<arrow::Table>
parquet___arrow___FileReader__ReadRowGroups2(
const std::shared_ptr<parquet::arrow::FileReader>& reader,
const std::vector<int>& row_groups, const std::vector<int>&
column_indices) {
std::shared_ptr<arrow::Table> table;
- PARQUET_THROW_NOT_OK(reader->ReadRowGroups(row_groups, column_indices,
&table));
+ auto result = RunWithCapturedRIfPossibleVoid(
+ [&]() { return reader->ReadRowGroups(row_groups, column_indices,
&table); });
+
+ StopIfNotOk(result);
return table;
}
@@ -199,7 +219,10 @@ int parquet___arrow___FileReader__num_row_groups(
std::shared_ptr<arrow::ChunkedArray> parquet___arrow___FileReader__ReadColumn(
const std::shared_ptr<parquet::arrow::FileReader>& reader, int i) {
std::shared_ptr<arrow::ChunkedArray> array;
- PARQUET_THROW_NOT_OK(reader->ReadColumn(i - 1, &array));
+ auto result =
+ RunWithCapturedRIfPossibleVoid([&]() { return reader->ReadColumn(i - 1,
&array); });
+
+ StopIfNotOk(result);
return array;
}