This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 75ef03165d GH-48394: [C++][Parquet] Add arrow::Result version of
parquet::arrow::FileReader::ReadTable() (#48939)
75ef03165d is described below
commit 75ef03165dc7e54aca0f842f54598833ddbd0a01
Author: fenfeng9 <[email protected]>
AuthorDate: Sat Jan 24 06:21:11 2026 +0800
GH-48394: [C++][Parquet] Add arrow::Result version of
parquet::arrow::FileReader::ReadTable() (#48939)
### Rationale for this change
`FileReader::ReadTable` previously returned `Status` and required callers
to pass an `out` parameter.
### What changes are included in this PR?
Introduce `Result<std::shared_ptr<Table>>` returning API to allow clearer
error propagation:
- Add new Result-returning `ReadTable()` methods
- Deprecate the old Status versions
- Migrate all callers to use the new API
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
Status version FileReader::ReadTable has been deprecated.
```cpp
virtual ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out);
virtual ::arrow::Status ReadTable(const std::vector<int>& column_indices,
std::shared_ptr<::arrow::Table>* out);
```
* GitHub Issue: #48394
Lead-authored-by: fenfeng9 <[email protected]>
Co-authored-by: fenfeng9 <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/parquet-glib/arrow-file-reader.cpp | 9 ++--
cpp/examples/arrow/parquet_read_write.cc | 2 +-
.../parquet/parquet_arrow/reader_writer.cc | 2 +-
.../tutorial_examples/file_access_example.cc | 4 +-
.../arrow/dataset/file_parquet_encryption_test.cc | 3 +-
cpp/src/arrow/filesystem/s3fs_benchmark.cc | 3 +-
cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 60 +++++++++-------------
cpp/src/parquet/arrow/reader.cc | 24 +++++++--
cpp/src/parquet/arrow/reader.h | 15 ++++--
cpp/src/parquet/arrow/reader_writer_benchmark.cc | 4 +-
cpp/src/parquet/chunker_internal_test.cc | 3 +-
python/pyarrow/_parquet.pyx | 10 ++--
python/pyarrow/includes/libparquet.pxd | 5 +-
r/src/parquet.cpp | 18 +++----
14 files changed, 81 insertions(+), 81 deletions(-)
diff --git a/c_glib/parquet-glib/arrow-file-reader.cpp
b/c_glib/parquet-glib/arrow-file-reader.cpp
index ddbfde1b4c..7c7d20291a 100644
--- a/c_glib/parquet-glib/arrow-file-reader.cpp
+++ b/c_glib/parquet-glib/arrow-file-reader.cpp
@@ -212,10 +212,11 @@ GArrowTable *
gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader, GError
**error)
{
auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader);
- std::shared_ptr<arrow::Table> arrow_table;
- auto status = parquet_arrow_file_reader->ReadTable(&arrow_table);
- if (garrow_error_check(error, status,
"[parquet][arrow][file-reader][read-table]")) {
- return garrow_table_new_raw(&arrow_table);
+ auto arrow_table_result = parquet_arrow_file_reader->ReadTable();
+ if (garrow::check(error,
+ arrow_table_result,
+ "[parquet][arrow][file-reader][read-table]")) {
+ return garrow_table_new_raw(&(*arrow_table_result));
} else {
return NULL;
}
diff --git a/cpp/examples/arrow/parquet_read_write.cc
b/cpp/examples/arrow/parquet_read_write.cc
index 7db13f360a..23483d89dc 100644
--- a/cpp/examples/arrow/parquet_read_write.cc
+++ b/cpp/examples/arrow/parquet_read_write.cc
@@ -38,7 +38,7 @@ arrow::Status ReadFullFile(std::string path_to_file) {
// Read entire file as a single Arrow table
std::shared_ptr<arrow::Table> table;
- ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table));
+ ARROW_ASSIGN_OR_RAISE(table, arrow_reader->ReadTable());
return arrow::Status::OK();
}
diff --git a/cpp/examples/parquet/parquet_arrow/reader_writer.cc
b/cpp/examples/parquet/parquet_arrow/reader_writer.cc
index 448c9ecfb8..5afd54094c 100644
--- a/cpp/examples/parquet/parquet_arrow/reader_writer.cc
+++ b/cpp/examples/parquet/parquet_arrow/reader_writer.cc
@@ -71,7 +71,7 @@ void read_whole_file() {
PARQUET_ASSIGN_OR_THROW(reader,
parquet::arrow::OpenFile(infile,
arrow::default_memory_pool()));
std::shared_ptr<arrow::Table> table;
- PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
+ PARQUET_ASSIGN_OR_THROW(table, reader->ReadTable());
std::cout << "Loaded " << table->num_rows() << " rows in " <<
table->num_columns()
<< " columns." << std::endl;
}
diff --git a/cpp/examples/tutorial_examples/file_access_example.cc
b/cpp/examples/tutorial_examples/file_access_example.cc
index df57174b31..eb53809571 100644
--- a/cpp/examples/tutorial_examples/file_access_example.cc
+++ b/cpp/examples/tutorial_examples/file_access_example.cc
@@ -185,9 +185,9 @@ arrow::Status RunMain() {
// (Doc section: Parquet OpenFile)
// (Doc section: Parquet Read)
- std::shared_ptr<arrow::Table> parquet_table;
// Read the table.
- PARQUET_THROW_NOT_OK(reader->ReadTable(&parquet_table));
+ std::shared_ptr<arrow::Table> parquet_table;
+ PARQUET_ASSIGN_OR_THROW(parquet_table, reader->ReadTable());
// (Doc section: Parquet Read)
// (Doc section: Parquet Write)
diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
index f6de35e3eb..94a07624b6 100644
--- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
@@ -397,8 +397,7 @@ TEST_P(DatasetEncryptionTest, ReadSingleFile) {
parquet::arrow::FileReaderBuilder reader_builder;
ASSERT_OK(reader_builder.Open(input, reader_properties));
ASSERT_OK_AND_ASSIGN(auto arrow_reader, reader_builder.Build());
- std::shared_ptr<Table> table;
- ASSERT_OK(arrow_reader->ReadTable(&table));
+ ASSERT_OK_AND_ASSIGN(auto table, arrow_reader->ReadTable());
// Check the contents of the table
ASSERT_EQ(table->num_rows(), 2);
diff --git a/cpp/src/arrow/filesystem/s3fs_benchmark.cc
b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
index 0cdcb03702..5fe47618ab 100644
--- a/cpp/src/arrow/filesystem/s3fs_benchmark.cc
+++ b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
@@ -315,8 +315,7 @@ static void ParquetRead(benchmark::State& st, S3FileSystem*
fs, const std::strin
ASSERT_OK(builder.properties(properties)->Build(&reader));
if (read_strategy == "ReadTable") {
- std::shared_ptr<Table> table;
- ASSERT_OK(reader->ReadTable(column_indices, &table));
+ ASSERT_OK_AND_ASSIGN(auto table, reader->ReadTable(column_indices));
} else {
ASSERT_OK_AND_ASSIGN(auto rb_reader, reader->GetRecordBatchReader(
std::vector<int>{0},
column_indices));
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index d46f963c74..edb59d9de3 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -439,7 +439,7 @@ void DoRoundtrip(const std::shared_ptr<Table>& table,
int64_t row_group_size,
FileReaderBuilder builder;
ASSERT_OK_NO_THROW(builder.Open(std::make_shared<BufferReader>(buffer)));
ASSERT_OK(builder.properties(arrow_reader_properties)->Build(&reader));
- ASSERT_OK_NO_THROW(reader->ReadTable(out));
+ ASSERT_OK_AND_ASSIGN(*out, reader->ReadTable());
}
void CheckConfiguredRoundtrip(
@@ -486,10 +486,10 @@ void DoSimpleRoundtrip(const std::shared_ptr<Table>&
table, bool use_threads,
reader->set_use_threads(use_threads);
if (column_subset.size() > 0) {
- ASSERT_OK_NO_THROW(reader->ReadTable(column_subset, out));
+ ASSERT_OK_AND_ASSIGN(*out, reader->ReadTable(column_subset));
} else {
// Read everything
- ASSERT_OK_NO_THROW(reader->ReadTable(out));
+ ASSERT_OK_AND_ASSIGN(*out, reader->ReadTable());
}
}
@@ -709,7 +709,7 @@ class ParquetIOTestBase : public ::testing::Test {
void ReadTableFromFile(std::unique_ptr<FileReader> reader, bool
expect_metadata,
std::shared_ptr<Table>* out) {
- ASSERT_OK_NO_THROW(reader->ReadTable(out));
+ ASSERT_OK_AND_ASSIGN(*out, reader->ReadTable());
auto key_value_metadata =
reader->parquet_reader()->metadata()->key_value_metadata().get();
if (!expect_metadata) {
@@ -2801,8 +2801,7 @@ TEST(TestArrowReadWrite, ReadCoalescedColumnSubset) {
{0, 4, 8, 10}, {0, 1, 2, 3}, {5, 17, 18, 19}};
for (std::vector<int>& column_subset : column_subsets) {
- std::shared_ptr<Table> result;
- ASSERT_OK(reader->ReadTable(column_subset, &result));
+ ASSERT_OK_AND_ASSIGN(auto result, reader->ReadTable(column_subset));
std::vector<std::shared_ptr<::arrow::ChunkedArray>> ex_columns;
std::vector<std::shared_ptr<::arrow::Field>> ex_fields;
@@ -2839,8 +2838,7 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
::arrow::default_memory_pool()));
// Read everything
- std::shared_ptr<Table> result;
- ASSERT_OK_NO_THROW(reader->ReadTable(&result));
+ ASSERT_OK_AND_ASSIGN(auto result, reader->ReadTable());
ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result));
// Read 1 record at a time
@@ -3579,8 +3577,7 @@ void DoNestedValidate(const
std::shared_ptr<::arrow::DataType>& inner_type,
ASSERT_OK(reader_builder.Build(&reader));
ARROW_SCOPED_TRACE("Parquet schema: ",
reader->parquet_reader()->metadata()->schema()->ToString());
- std::shared_ptr<Table> result;
- ASSERT_OK_NO_THROW(reader->ReadTable(&result));
+ ASSERT_OK_AND_ASSIGN(auto result, reader->ReadTable());
if (inner_type->id() == ::arrow::Type::DATE64 ||
inner_type->id() == ::arrow::Type::TIMESTAMP ||
@@ -4036,8 +4033,7 @@ class TestNestedSchemaRead : public
::testing::TestWithParam<Repetition::type> {
TEST_F(TestNestedSchemaRead, ReadIntoTableFull) {
ASSERT_NO_FATAL_FAILURE(CreateSimpleNestedParquet(Repetition::OPTIONAL));
- std::shared_ptr<Table> table;
- ASSERT_OK_NO_THROW(reader_->ReadTable(&table));
+ ASSERT_OK_AND_ASSIGN(auto table, reader_->ReadTable());
ASSERT_EQ(table->num_rows(), NUM_SIMPLE_TEST_ROWS);
ASSERT_EQ(table->num_columns(), 2);
ASSERT_EQ(table->schema()->field(0)->type()->num_fields(), 2);
@@ -4080,7 +4076,7 @@ TEST_F(TestNestedSchemaRead, ReadTablePartial) {
std::shared_ptr<Table> table;
// columns: {group1.leaf1, leaf3}
- ASSERT_OK_NO_THROW(reader_->ReadTable({0, 2}, &table));
+ ASSERT_OK_AND_ASSIGN(table, reader_->ReadTable({0, 2}));
ASSERT_EQ(table->num_rows(), NUM_SIMPLE_TEST_ROWS);
ASSERT_EQ(table->num_columns(), 2);
ASSERT_EQ(table->schema()->field(0)->name(), "group1");
@@ -4098,7 +4094,7 @@ TEST_F(TestNestedSchemaRead, ReadTablePartial) {
ASSERT_NO_FATAL_FAILURE(ValidateTableArrayTypes(*table));
// columns: {group1.leaf1, group1.leaf2}
- ASSERT_OK_NO_THROW(reader_->ReadTable({0, 1}, &table));
+ ASSERT_OK_AND_ASSIGN(table, reader_->ReadTable({0, 1}));
ASSERT_EQ(table->num_rows(), NUM_SIMPLE_TEST_ROWS);
ASSERT_EQ(table->num_columns(), 1);
ASSERT_EQ(table->schema()->field(0)->name(), "group1");
@@ -4106,7 +4102,7 @@ TEST_F(TestNestedSchemaRead, ReadTablePartial) {
ASSERT_NO_FATAL_FAILURE(ValidateTableArrayTypes(*table));
// columns: {leaf3}
- ASSERT_OK_NO_THROW(reader_->ReadTable({2}, &table));
+ ASSERT_OK_AND_ASSIGN(table, reader_->ReadTable({2}));
ASSERT_EQ(table->num_rows(), NUM_SIMPLE_TEST_ROWS);
ASSERT_EQ(table->num_columns(), 1);
ASSERT_EQ(table->schema()->field(0)->name(), "leaf3");
@@ -4114,7 +4110,7 @@ TEST_F(TestNestedSchemaRead, ReadTablePartial) {
ASSERT_NO_FATAL_FAILURE(ValidateTableArrayTypes(*table));
// Test with different ordering
- ASSERT_OK_NO_THROW(reader_->ReadTable({2, 0}, &table));
+ ASSERT_OK_AND_ASSIGN(table, reader_->ReadTable({2, 0}));
ASSERT_EQ(table->num_rows(), NUM_SIMPLE_TEST_ROWS);
ASSERT_EQ(table->num_columns(), 2);
ASSERT_EQ(table->schema()->field(0)->name(), "leaf3");
@@ -4135,8 +4131,7 @@ TEST_P(TestNestedSchemaRead, DeepNestedSchemaRead) {
int num_rows = SMALL_SIZE * (depth + 2);
ASSERT_NO_FATAL_FAILURE(CreateMultiLevelNestedParquet(num_trees, depth,
num_children,
num_rows, GetParam()));
- std::shared_ptr<Table> table;
- ASSERT_OK_NO_THROW(reader_->ReadTable(&table));
+ ASSERT_OK_AND_ASSIGN(auto table, reader_->ReadTable());
ASSERT_EQ(table->num_columns(), num_trees);
ASSERT_EQ(table->num_rows(), num_rows);
@@ -4184,8 +4179,8 @@ void TryReadDataFile(const std::string& path,
Status s;
auto reader_result = FileReader::Make(pool,
ParquetFileReader::OpenFile(path, false));
if (reader_result.ok()) {
- std::shared_ptr<::arrow::Table> table;
- s = (*reader_result)->ReadTable(&table);
+ auto table_result = (*reader_result)->ReadTable();
+ s = table_result.status();
} else {
s = reader_result.status();
}
@@ -4261,7 +4256,7 @@ TEST(TestArrowReaderAdHoc,
LARGE_MEMORY_TEST(LargeStringColumn)) {
auto reader =
ParquetFileReader::Open(std::make_shared<BufferReader>(tables_buffer));
ASSERT_OK_AND_ASSIGN(auto arrow_reader,
FileReader::Make(default_memory_pool(),
std::move(reader)));
- ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
+ ASSERT_OK_AND_ASSIGN(table, arrow_reader->ReadTable());
ASSERT_OK(table->ValidateFull());
// ARROW-9297: ensure RecordBatchReader also works
@@ -4365,8 +4360,7 @@ TEST(TestArrowReaderAdHoc, LegacyTwoLevelList) {
// Verify Arrow schema and data
ASSERT_OK_AND_ASSIGN(auto reader,
FileReader::Make(default_memory_pool(),
std::move(file_reader)));
- std::shared_ptr<Table> table;
- ASSERT_OK(reader->ReadTable(&table));
+ ASSERT_OK_AND_ASSIGN(auto table, reader->ReadTable());
ASSERT_OK(table->ValidateFull());
AssertTablesEqual(*expected_table, *table);
};
@@ -4429,8 +4423,7 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) {
ASSERT_OK_AND_ASSIGN(auto arrow_reader,
FileReader::Make(pool,
ParquetFileReader::OpenFile(path, false)));
- std::shared_ptr<::arrow::Table> table;
- ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
+ ASSERT_OK_AND_ASSIGN(auto table, arrow_reader->ReadTable());
std::shared_ptr<::arrow::Schema> schema;
ASSERT_OK_NO_THROW(arrow_reader->GetSchema(&schema));
@@ -4495,8 +4488,7 @@ TEST(TestArrowReaderAdHoc, ReadFloat16Files) {
ASSERT_OK_AND_ASSIGN(
auto reader, FileReader::Make(pool, ParquetFileReader::OpenFile(path,
false)));
- std::shared_ptr<::arrow::Table> table;
- ASSERT_OK_NO_THROW(reader->ReadTable(&table));
+ ASSERT_OK_AND_ASSIGN(auto table, reader->ReadTable());
std::shared_ptr<::arrow::Schema> schema;
ASSERT_OK_NO_THROW(reader->GetSchema(&schema));
@@ -4907,8 +4899,7 @@ class TestArrowReadDictionary : public
::testing::TestWithParam<double> {
void CheckReadWholeFile(const Table& expected) {
ASSERT_OK_AND_ASSIGN(auto reader, GetReader());
- std::shared_ptr<Table> actual;
- ASSERT_OK_NO_THROW(reader->ReadTable(&actual));
+ ASSERT_OK_AND_ASSIGN(auto actual, reader->ReadTable());
::arrow::AssertTablesEqual(expected, *actual, /*same_chunk_layout=*/false);
}
@@ -5005,8 +4996,7 @@ TEST_P(TestArrowReadDictionary, IncrementalReads) {
// Read in one shot
ASSERT_OK_AND_ASSIGN(std::unique_ptr<FileReader> reader, GetReader());
- std::shared_ptr<Table> expected;
- ASSERT_OK_NO_THROW(reader->ReadTable(&expected));
+ ASSERT_OK_AND_ASSIGN(auto expected, reader->ReadTable());
ASSERT_OK_AND_ASSIGN(reader, GetReader());
std::unique_ptr<ColumnReader> col;
@@ -5138,7 +5128,7 @@ class TestArrowReadDeltaEncoding : public ::testing::Test
{
ASSERT_OK_AND_ASSIGN(
auto parquet_reader,
FileReader::Make(pool, ParquetFileReader::OpenFile(file, false)));
- ASSERT_OK(parquet_reader->ReadTable(out));
+ ASSERT_OK_AND_ASSIGN(*out, parquet_reader->ReadTable());
ASSERT_OK((*out)->ValidateFull());
}
@@ -5340,8 +5330,7 @@ TEST_P(TestNestedSchemaFilteredReader, ReadWrite) {
FileReaderBuilder builder;
ASSERT_OK_NO_THROW(builder.Open(std::make_shared<BufferReader>(buffer)));
ASSERT_OK(builder.properties(default_arrow_reader_properties())->Build(&reader));
- std::shared_ptr<::arrow::Table> read_table;
- ASSERT_OK_NO_THROW(reader->ReadTable(GetParam().indices_to_read,
&read_table));
+ ASSERT_OK_AND_ASSIGN(auto read_table,
reader->ReadTable(GetParam().indices_to_read));
std::shared_ptr<::arrow::Array> expected =
ArrayFromJSON(GetParam().expected_schema, GetParam().read_data);
@@ -5817,10 +5806,9 @@ TEST(TestArrowReadWrite, MultithreadedWrite) {
ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish());
// Read to verify the data.
- std::shared_ptr<Table> result;
ASSERT_OK_AND_ASSIGN(auto reader,
OpenFile(std::make_shared<BufferReader>(buffer), pool));
- ASSERT_OK_NO_THROW(reader->ReadTable(&result));
+ ASSERT_OK_AND_ASSIGN(auto result, reader->ReadTable());
ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result));
}
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 6b70087077..434430a875 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -202,9 +202,12 @@ class FileReaderImpl : public FileReader {
std::shared_ptr<RowGroupReader> RowGroup(int row_group_index) override;
- Status ReadTable(const std::vector<int>& indices,
- std::shared_ptr<Table>* out) override {
- return ReadRowGroups(Iota(reader_->metadata()->num_row_groups()), indices,
out);
+ Result<std::shared_ptr<Table>> ReadTable(
+ const std::vector<int>& column_indices) override {
+ std::shared_ptr<Table> table;
+ RETURN_NOT_OK(ReadRowGroups(Iota(reader_->metadata()->num_row_groups()),
+ column_indices, &table));
+ return table;
}
Status GetFieldReader(int i,
@@ -305,8 +308,8 @@ class FileReaderImpl : public FileReader {
return ReadColumn(i, Iota(reader_->metadata()->num_row_groups()), out);
}
- Status ReadTable(std::shared_ptr<Table>* table) override {
- return ReadTable(Iota(reader_->metadata()->num_columns()), table);
+ Result<std::shared_ptr<Table>> ReadTable() override {
+ return ReadTable(Iota(reader_->metadata()->num_columns()));
}
Status ReadRowGroups(const std::vector<int>& row_groups,
@@ -1339,6 +1342,17 @@ Status FileReader::GetRecordBatchReader(const
std::vector<int>& row_group_indice
return Status::OK();
}
+Status FileReader::ReadTable(std::shared_ptr<Table>* out) {
+ ARROW_ASSIGN_OR_RAISE(*out, ReadTable());
+ return Status::OK();
+}
+
+Status FileReader::ReadTable(const std::vector<int>& column_indices,
+ std::shared_ptr<Table>* out) {
+ ARROW_ASSIGN_OR_RAISE(*out, ReadTable(column_indices));
+ return Status::OK();
+}
+
Status FileReader::Make(::arrow::MemoryPool* pool,
std::unique_ptr<ParquetFileReader> reader,
const ArrowReaderProperties& properties,
diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h
index 54620b3d0f..d0665ea310 100644
--- a/cpp/src/parquet/arrow/reader.h
+++ b/cpp/src/parquet/arrow/reader.h
@@ -235,7 +235,11 @@ class PARQUET_EXPORT FileReader {
int64_t rows_to_readahead = 0) = 0;
/// Read all columns into a Table
- virtual ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out) = 0;
+ virtual ::arrow::Result<std::shared_ptr<::arrow::Table>> ReadTable() = 0;
+
+ /// \deprecated Deprecated in 24.0.0. Use arrow::Result version instead.
+ ARROW_DEPRECATED("Deprecated in 24.0.0. Use arrow::Result version instead.")
+ ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out);
/// \brief Read the given columns into a Table
///
@@ -254,8 +258,13 @@ class PARQUET_EXPORT FileReader {
/// manifest().schema_fields to get the top level fields, and then walk the
/// tree to identify the relevant leaf fields and access its column_index.
/// To get the total number of leaf fields, use FileMetadata.num_columns().
- virtual ::arrow::Status ReadTable(const std::vector<int>& column_indices,
- std::shared_ptr<::arrow::Table>* out) = 0;
+ virtual ::arrow::Result<std::shared_ptr<::arrow::Table>> ReadTable(
+ const std::vector<int>& column_indices) = 0;
+
+ /// \deprecated Deprecated in 24.0.0. Use arrow::Result version instead.
+ ARROW_DEPRECATED("Deprecated in 24.0.0. Use arrow::Result version instead.")
+ ::arrow::Status ReadTable(const std::vector<int>& column_indices,
+ std::shared_ptr<::arrow::Table>* out);
virtual ::arrow::Status ReadRowGroup(int i, const std::vector<int>&
column_indices,
std::shared_ptr<::arrow::Table>* out) =
0;
diff --git a/cpp/src/parquet/arrow/reader_writer_benchmark.cc
b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
index cfb458ddb3..7523a781d8 100644
--- a/cpp/src/parquet/arrow/reader_writer_benchmark.cc
+++ b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
@@ -301,8 +301,8 @@ static void BenchmarkReadTable(::benchmark::State& state,
const Table& table,
EXIT_NOT_OK(arrow_reader_result.status());
auto arrow_reader = std::move(*arrow_reader_result);
- std::shared_ptr<Table> table;
- EXIT_NOT_OK(arrow_reader->ReadTable(&table));
+ auto table_result = arrow_reader->ReadTable();
+ EXIT_NOT_OK(table_result.status());
}
if (num_values == -1) {
diff --git a/cpp/src/parquet/chunker_internal_test.cc
b/cpp/src/parquet/chunker_internal_test.cc
index 1b028cb1d6..2469d54afb 100644
--- a/cpp/src/parquet/chunker_internal_test.cc
+++ b/cpp/src/parquet/chunker_internal_test.cc
@@ -319,7 +319,6 @@ Result<std::shared_ptr<Table>> ConcatAndCombine(
}
Result<std::shared_ptr<Table>> ReadTableFromBuffer(const
std::shared_ptr<Buffer>& data) {
- std::shared_ptr<Table> result;
FileReaderBuilder builder;
std::unique_ptr<FileReader> reader;
auto props = default_arrow_reader_properties();
@@ -329,7 +328,7 @@ Result<std::shared_ptr<Table>> ReadTableFromBuffer(const
std::shared_ptr<Buffer>
RETURN_NOT_OK(builder.memory_pool(::arrow::default_memory_pool())
->properties(props)
->Build(&reader));
- RETURN_NOT_OK(reader->ReadTable(&result));
+ ARROW_ASSIGN_OR_RAISE(auto result, reader->ReadTable());
return result;
}
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 66e0294010..c1c20026db 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -1847,7 +1847,7 @@ cdef class ParquetReader(_Weakrefable):
table : pyarrow.Table
"""
cdef:
- shared_ptr[CTable] ctable
+ CResult[shared_ptr[CTable]] table_result
vector[int] c_column_indices
self.set_use_threads(use_threads)
@@ -1857,14 +1857,12 @@ cdef class ParquetReader(_Weakrefable):
c_column_indices.push_back(index)
with nogil:
- check_status(self.reader.get()
- .ReadTable(c_column_indices, &ctable))
+ table_result = self.reader.get().ReadTable(c_column_indices)
else:
# Read all columns
with nogil:
- check_status(self.reader.get()
- .ReadTable(&ctable))
- return pyarrow_wrap_table(ctable)
+ table_result = self.reader.get().ReadTable()
+ return pyarrow_wrap_table(GetResultValue(table_result))
def scan_contents(self, column_indices=None, batch_size=65536):
"""
diff --git a/python/pyarrow/includes/libparquet.pxd
b/python/pyarrow/includes/libparquet.pxd
index 81901a00ac..c19977396f 100644
--- a/python/pyarrow/includes/libparquet.pxd
+++ b/python/pyarrow/includes/libparquet.pxd
@@ -548,9 +548,8 @@ cdef extern from "parquet/arrow/reader.h" namespace
"parquet::arrow" nogil:
const
vector[int]& column_indices)
CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const
vector[int]& row_group_indices)
- CStatus ReadTable(shared_ptr[CTable]* out)
- CStatus ReadTable(const vector[int]& column_indices,
- shared_ptr[CTable]* out)
+ CResult[shared_ptr[CTable]] ReadTable()
+ CResult[shared_ptr[CTable]] ReadTable(const vector[int]&
column_indices)
CStatus ScanContents(vector[int] columns, int32_t column_batch_size,
int64_t* num_rows)
diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp
index c32d064059..3633c51d45 100644
--- a/r/src/parquet.cpp
+++ b/r/src/parquet.cpp
@@ -130,24 +130,18 @@ std::shared_ptr<parquet::arrow::FileReader>
parquet___arrow___FileReader__OpenFi
// [[parquet::export]]
std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable1(
const std::shared_ptr<parquet::arrow::FileReader>& reader) {
- std::shared_ptr<arrow::Table> table;
- auto result =
- RunWithCapturedRIfPossibleVoid([&]() { return reader->ReadTable(&table);
});
-
- StopIfNotOk(result);
- return table;
+ auto result = RunWithCapturedRIfPossible<std::shared_ptr<arrow::Table>>(
+ [&]() { return reader->ReadTable(); });
+ return ValueOrStop(result);
}
// [[parquet::export]]
std::shared_ptr<arrow::Table> parquet___arrow___FileReader__ReadTable2(
const std::shared_ptr<parquet::arrow::FileReader>& reader,
const std::vector<int>& column_indices) {
- std::shared_ptr<arrow::Table> table;
- auto result = RunWithCapturedRIfPossibleVoid(
- [&]() { return reader->ReadTable(column_indices, &table); });
-
- StopIfNotOk(result);
- return table;
+ auto result = RunWithCapturedRIfPossible<std::shared_ptr<arrow::Table>>(
+ [&]() { return reader->ReadTable(column_indices); });
+ return ValueOrStop(result);
}
// [[parquet::export]]