This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b5d36e9f4d GH-36819: [R] Use RunWithCapturedR for reading Parquet 
files (#37274)
b5d36e9f4d is described below

commit b5d36e9f4d323f274e06ed04285ddd4e2121ab19
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Sep 5 10:12:00 2023 -0300

    GH-36819: [R] Use RunWithCapturedR for reading Parquet files (#37274)
    
    ### Rationale for this change
    
    When we first added RunWithCapturedR to support reading files from R 
connections, none of the Parquet tests seemed to call R from another thread. 
Because RunWithCapturedR comes with some complexity, I didn't add it anywhere 
it wasn't strictly needed. A recent StackOverflow post exposed that reading 
very large parquet files do use multiple threads and thus need RunWithCapturedR.
    
    ### What changes are included in this PR?
    
    The two most common calls to read a parquet in which a user might trigger 
this failure are now wrapped in RunWithCapturedR.
    
    ### Are these changes tested?
    
    The changes are tested in the current suite.
    
    ### Are there any user-facing changes?
    
    No.
    * Closes: #36819
    
    Lead-authored-by: Dewey Dunnington <[email protected]>
    Co-authored-by: Dewey Dunnington <[email protected]>
    Signed-off-by: Dewey Dunnington <[email protected]>
---
 r/src/parquet.cpp | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp
index f9f7b0447c..c32d064059 100644
--- a/r/src/parquet.cpp
+++ b/r/src/parquet.cpp
@@ -17,6 +17,8 @@
 
 #include "./arrow_types.h"
 
+#include "./safe-call-into-r.h"
+
 #if defined(ARROW_R_WITH_PARQUET)
 
 #include <arrow/table.h>
@@ -129,7 +131,10 @@ std::shared_ptr<parquet::arrow::FileReader> 
parquet___arrow___FileReader__OpenFi
 std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable1(
     const std::shared_ptr<parquet::arrow::FileReader>& reader) {
   std::shared_ptr<arrow::Table> table;
-  PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
+  auto result =
+      RunWithCapturedRIfPossibleVoid([&]() { return reader->ReadTable(&table); 
});
+
+  StopIfNotOk(result);
   return table;
 }
 
@@ -138,7 +143,10 @@ std::shared_ptr<arrow::Table> 
parquet___arrow___FileReader__ReadTable2(
     const std::shared_ptr<parquet::arrow::FileReader>& reader,
     const std::vector<int>& column_indices) {
   std::shared_ptr<arrow::Table> table;
-  PARQUET_THROW_NOT_OK(reader->ReadTable(column_indices, &table));
+  auto result = RunWithCapturedRIfPossibleVoid(
+      [&]() { return reader->ReadTable(column_indices, &table); });
+
+  StopIfNotOk(result);
   return table;
 }
 
@@ -146,7 +154,10 @@ std::shared_ptr<arrow::Table> 
parquet___arrow___FileReader__ReadTable2(
 std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadRowGroup1(
     const std::shared_ptr<parquet::arrow::FileReader>& reader, int i) {
   std::shared_ptr<arrow::Table> table;
-  PARQUET_THROW_NOT_OK(reader->ReadRowGroup(i, &table));
+  auto result =
+      RunWithCapturedRIfPossibleVoid([&]() { return reader->ReadRowGroup(i, 
&table); });
+
+  StopIfNotOk(result);
   return table;
 }
 
@@ -155,7 +166,10 @@ std::shared_ptr<arrow::Table> 
parquet___arrow___FileReader__ReadRowGroup2(
     const std::shared_ptr<parquet::arrow::FileReader>& reader, int i,
     const std::vector<int>& column_indices) {
   std::shared_ptr<arrow::Table> table;
-  PARQUET_THROW_NOT_OK(reader->ReadRowGroup(i, column_indices, &table));
+  auto result = RunWithCapturedRIfPossibleVoid(
+      [&]() { return reader->ReadRowGroup(i, column_indices, &table); });
+
+  StopIfNotOk(result);
   return table;
 }
 
@@ -164,7 +178,10 @@ std::shared_ptr<arrow::Table> 
parquet___arrow___FileReader__ReadRowGroups1(
     const std::shared_ptr<parquet::arrow::FileReader>& reader,
     const std::vector<int>& row_groups) {
   std::shared_ptr<arrow::Table> table;
-  PARQUET_THROW_NOT_OK(reader->ReadRowGroups(row_groups, &table));
+  auto result = RunWithCapturedRIfPossibleVoid(
+      [&]() { return reader->ReadRowGroups(row_groups, &table); });
+
+  StopIfNotOk(result);
   return table;
 }
 
@@ -173,7 +190,10 @@ std::shared_ptr<arrow::Table> 
parquet___arrow___FileReader__ReadRowGroups2(
     const std::shared_ptr<parquet::arrow::FileReader>& reader,
     const std::vector<int>& row_groups, const std::vector<int>& 
column_indices) {
   std::shared_ptr<arrow::Table> table;
-  PARQUET_THROW_NOT_OK(reader->ReadRowGroups(row_groups, column_indices, 
&table));
+  auto result = RunWithCapturedRIfPossibleVoid(
+      [&]() { return reader->ReadRowGroups(row_groups, column_indices, 
&table); });
+
+  StopIfNotOk(result);
   return table;
 }
 
@@ -199,7 +219,10 @@ int parquet___arrow___FileReader__num_row_groups(
 std::shared_ptr<arrow::ChunkedArray> parquet___arrow___FileReader__ReadColumn(
     const std::shared_ptr<parquet::arrow::FileReader>& reader, int i) {
   std::shared_ptr<arrow::ChunkedArray> array;
-  PARQUET_THROW_NOT_OK(reader->ReadColumn(i - 1, &array));
+  auto result =
+      RunWithCapturedRIfPossibleVoid([&]() { return reader->ReadColumn(i - 1, 
&array); });
+
+  StopIfNotOk(result);
   return array;
 }
 

Reply via email to