This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 33e8cbb2ea GH-44784: [C++][Parquet] Add `arrow::Result` version of
`parquet::arrow::OpenFile()` (#44785)
33e8cbb2ea is described below
commit 33e8cbb2eae70efb5ec4efc07e8fd79edb1057cf
Author: Sutou Kouhei <[email protected]>
AuthorDate: Wed Nov 20 10:54:54 2024 +0900
GH-44784: [C++][Parquet] Add `arrow::Result` version of
`parquet::arrow::OpenFile()` (#44785)
### Rationale for this change
We're migrating `arrow::Status` + output variable API to `arrow::Result`
API.
### What changes are included in this PR?
* Add `arrow::Result<std::unique_ptr<FileReader>>
parquet::arrow::OpenFile()`
* Deprecate `arrow::Status parquet::arrow::OpenFile()`
* Use the added `arrow::Result` version in our code base
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #44784
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/parquet-glib/arrow-file-reader.cpp | 26 +++++------
cpp/examples/arrow/parquet_read_write.cc | 2 +-
.../parquet/parquet_arrow/reader_writer.cc | 16 +++----
cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 50 +++++++++-------------
cpp/src/parquet/arrow/reader.cc | 7 ++-
cpp/src/parquet/arrow/reader.h | 10 +++++
cpp/src/parquet/arrow/reconstruct_internal_test.cc | 4 +-
7 files changed, 63 insertions(+), 52 deletions(-)
diff --git a/c_glib/parquet-glib/arrow-file-reader.cpp
b/c_glib/parquet-glib/arrow-file-reader.cpp
index 4996d78627..692fd0ca0a 100644
--- a/c_glib/parquet-glib/arrow-file-reader.cpp
+++ b/c_glib/parquet-glib/arrow-file-reader.cpp
@@ -134,12 +134,13 @@
gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, GError *
{
auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(source);
auto arrow_memory_pool = arrow::default_memory_pool();
- std::unique_ptr<parquet::arrow::FileReader> parquet_arrow_file_reader;
- auto status = parquet::arrow::OpenFile(arrow_random_access_file,
- arrow_memory_pool,
- &parquet_arrow_file_reader);
- if (garrow_error_check(error, status,
"[parquet][arrow][file-reader][new-arrow]")) {
- return
gparquet_arrow_file_reader_new_raw(parquet_arrow_file_reader.release());
+ auto parquet_arrow_file_reader_result =
+ parquet::arrow::OpenFile(arrow_random_access_file, arrow_memory_pool);
+ if (garrow::check(error,
+ parquet_arrow_file_reader_result,
+ "[parquet][arrow][file-reader][new-arrow]")) {
+ return gparquet_arrow_file_reader_new_raw(
+ parquet_arrow_file_reader_result->release());
} else {
return NULL;
}
@@ -168,12 +169,13 @@ gparquet_arrow_file_reader_new_path(const gchar *path,
GError **error)
std::shared_ptr<arrow::io::RandomAccessFile> arrow_random_access_file =
arrow_memory_mapped_file.ValueOrDie();
auto arrow_memory_pool = arrow::default_memory_pool();
- std::unique_ptr<parquet::arrow::FileReader> parquet_arrow_file_reader;
- auto status = parquet::arrow::OpenFile(arrow_random_access_file,
- arrow_memory_pool,
- &parquet_arrow_file_reader);
- if (garrow::check(error, status, "[parquet][arrow][file-reader][new-path]"))
{
- return
gparquet_arrow_file_reader_new_raw(parquet_arrow_file_reader.release());
+ auto parquet_arrow_file_reader_result =
+ parquet::arrow::OpenFile(arrow_random_access_file, arrow_memory_pool);
+ if (garrow::check(error,
+ parquet_arrow_file_reader_result,
+ "[parquet][arrow][file-reader][new-path]")) {
+ return gparquet_arrow_file_reader_new_raw(
+ parquet_arrow_file_reader_result->release());
} else {
return NULL;
}
diff --git a/cpp/examples/arrow/parquet_read_write.cc
b/cpp/examples/arrow/parquet_read_write.cc
index 3b8b4c2212..a07c10fda5 100644
--- a/cpp/examples/arrow/parquet_read_write.cc
+++ b/cpp/examples/arrow/parquet_read_write.cc
@@ -34,7 +34,7 @@ arrow::Status ReadFullFile(std::string path_to_file) {
// Open Parquet file reader
std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
- ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, pool, &arrow_reader));
+ ARROW_ASSIGN_OR_RAISE(arrow_reader, parquet::arrow::OpenFile(input, pool));
// Read entire file as a single Arrow table
std::shared_ptr<arrow::Table> table;
diff --git a/cpp/examples/parquet/parquet_arrow/reader_writer.cc
b/cpp/examples/parquet/parquet_arrow/reader_writer.cc
index f5d96ec16c..448c9ecfb8 100644
--- a/cpp/examples/parquet/parquet_arrow/reader_writer.cc
+++ b/cpp/examples/parquet/parquet_arrow/reader_writer.cc
@@ -68,8 +68,8 @@ void read_whole_file() {
arrow::default_memory_pool()));
std::unique_ptr<parquet::arrow::FileReader> reader;
- PARQUET_THROW_NOT_OK(
- parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+ PARQUET_ASSIGN_OR_THROW(reader,
+ parquet::arrow::OpenFile(infile,
arrow::default_memory_pool()));
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
std::cout << "Loaded " << table->num_rows() << " rows in " <<
table->num_columns()
@@ -85,8 +85,8 @@ void read_single_rowgroup() {
arrow::default_memory_pool()));
std::unique_ptr<parquet::arrow::FileReader> reader;
- PARQUET_THROW_NOT_OK(
- parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+ PARQUET_ASSIGN_OR_THROW(reader,
+ parquet::arrow::OpenFile(infile,
arrow::default_memory_pool()));
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->RowGroup(0)->ReadTable(&table));
std::cout << "Loaded " << table->num_rows() << " rows in " <<
table->num_columns()
@@ -102,8 +102,8 @@ void read_single_column() {
arrow::default_memory_pool()));
std::unique_ptr<parquet::arrow::FileReader> reader;
- PARQUET_THROW_NOT_OK(
- parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+ PARQUET_ASSIGN_OR_THROW(reader,
+ parquet::arrow::OpenFile(infile,
arrow::default_memory_pool()));
std::shared_ptr<arrow::ChunkedArray> array;
PARQUET_THROW_NOT_OK(reader->ReadColumn(0, &array));
PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
@@ -122,8 +122,8 @@ void read_single_column_chunk() {
arrow::default_memory_pool()));
std::unique_ptr<parquet::arrow::FileReader> reader;
- PARQUET_THROW_NOT_OK(
- parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
+ PARQUET_ASSIGN_OR_THROW(reader,
+ parquet::arrow::OpenFile(infile,
arrow::default_memory_pool()));
std::shared_ptr<arrow::ChunkedArray> array;
PARQUET_THROW_NOT_OK(reader->RowGroup(0)->Column(0)->Read(&array));
PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 73974f9b2a..b5fd2bc255 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -448,9 +448,8 @@ void DoSimpleRoundtrip(const std::shared_ptr<Table>& table,
bool use_threads,
ASSERT_NO_FATAL_FAILURE(
WriteTableToBuffer(table, row_group_size, arrow_properties, &buffer));
- std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
- ::arrow::default_memory_pool(), &reader));
+ ASSERT_OK_AND_ASSIGN(auto reader,
OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool()));
reader->set_use_threads(use_threads);
if (column_subset.size() > 0) {
@@ -1095,8 +1094,7 @@ TYPED_TEST(TestParquetIO,
SingleColumnTableRequiredChunkedWriteArrowIO) {
auto source = std::make_shared<BufferReader>(pbuffer);
std::shared_ptr<::arrow::Table> out;
- std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(OpenFile(source, ::arrow::default_memory_pool(),
&reader));
+ ASSERT_OK_AND_ASSIGN(auto reader, OpenFile(source,
::arrow::default_memory_pool()));
ASSERT_NO_FATAL_FAILURE(this->ReadTableFromFile(std::move(reader), &out));
ASSERT_EQ(1, out->num_columns());
ASSERT_EQ(values->length(), out->num_rows());
@@ -2295,9 +2293,8 @@ TEST(TestArrowReadWrite, ReadSingleRowGroup) {
ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, num_rows / 2,
default_arrow_writer_properties(), &buffer));
- std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
- ::arrow::default_memory_pool(), &reader));
+ ASSERT_OK_AND_ASSIGN(auto reader,
OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool()));
ASSERT_EQ(2, reader->num_row_groups());
@@ -2357,9 +2354,8 @@ TEST(TestArrowReadWrite, ReadTableManually) {
ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(expected, num_rows / 2,
default_arrow_writer_properties(), &buffer));
- std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
- ::arrow::default_memory_pool(), &reader));
+ ASSERT_OK_AND_ASSIGN(auto reader,
OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool()));
ASSERT_EQ(2, reader->num_row_groups());
@@ -2476,9 +2472,8 @@ TEST(TestArrowReadWrite,
CoalescedReadsAndNonCoalescedReads) {
ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(expected, num_rows / 2,
default_arrow_writer_properties(), &buffer));
- std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
- ::arrow::default_memory_pool(), &reader));
+ ASSERT_OK_AND_ASSIGN(auto reader,
OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool()));
ASSERT_EQ(2, reader->num_row_groups());
@@ -2594,9 +2589,8 @@ TEST(TestArrowReadWrite, ScanContents) {
ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, num_rows / 2,
default_arrow_writer_properties(), &buffer));
- std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
- ::arrow::default_memory_pool(), &reader));
+ ASSERT_OK_AND_ASSIGN(auto reader,
OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool()));
int64_t num_rows_returned = 0;
ASSERT_OK_NO_THROW(reader->ScanContents({}, 256, &num_rows_returned));
@@ -2689,9 +2683,8 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size,
default_arrow_writer_properties(), &buffer));
- std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
- ::arrow::default_memory_pool(), &reader));
+ ASSERT_OK_AND_ASSIGN(auto reader,
OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool()));
// Read everything
std::shared_ptr<Table> result;
@@ -2699,8 +2692,8 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result));
// Read 1 record at a time
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
- ::arrow::default_memory_pool(), &reader));
+ ASSERT_OK_AND_ASSIGN(reader, OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool()));
std::unique_ptr<ColumnReader> col_reader;
ASSERT_OK(reader->GetColumn(0, &col_reader));
@@ -2974,9 +2967,8 @@ TEST(ArrowReadWrite, DecimalStats) {
ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, /*row_group_size=*/100,
default_arrow_writer_properties(), &buffer));
- std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
- ::arrow::default_memory_pool(), &reader));
+ ASSERT_OK_AND_ASSIGN(auto reader,
OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool()));
std::shared_ptr<Scalar> min, max;
ReadSingleColumnFileStatistics(std::move(reader), &min, &max);
@@ -3575,8 +3567,8 @@ class TestNestedSchemaRead : public
::testing::TestWithParam<Repetition::type> {
void InitReader() {
ASSERT_OK_AND_ASSIGN(auto buffer, nested_parquet_->Finish());
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
- ::arrow::default_memory_pool(), &reader_));
+ ASSERT_OK_AND_ASSIGN(reader_,
OpenFile(std::make_shared<BufferReader>(buffer),
+ ::arrow::default_memory_pool()));
}
void InitNewParquetFile(const std::shared_ptr<GroupNode>& schema, int
num_rows) {
@@ -5344,8 +5336,8 @@ TEST(TestArrowReadWrite, MultithreadedWrite) {
// Read to verify the data.
std::shared_ptr<Table> result;
- std::unique_ptr<FileReader> reader;
- ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer), pool,
&reader));
+ ASSERT_OK_AND_ASSIGN(auto reader,
+ OpenFile(std::make_shared<BufferReader>(buffer), pool));
ASSERT_OK_NO_THROW(reader->ReadTable(&result));
ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result));
}
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 4f57c3f4f5..3002d90b5f 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -1372,9 +1372,14 @@ Result<std::unique_ptr<FileReader>>
FileReaderBuilder::Build() {
Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile> file,
MemoryPool* pool,
std::unique_ptr<FileReader>* reader) {
+ return OpenFile(std::move(file), pool).Value(reader);
+}
+
+Result<std::unique_ptr<FileReader>> OpenFile(
+ std::shared_ptr<::arrow::io::RandomAccessFile> file, MemoryPool* pool) {
FileReaderBuilder builder;
RETURN_NOT_OK(builder.Open(std::move(file)));
- return builder.memory_pool(pool)->Build(reader);
+ return builder.memory_pool(pool)->Build();
}
namespace internal {
diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h
index 6e46ca43f7..ec996a5afa 100644
--- a/cpp/src/parquet/arrow/reader.h
+++ b/cpp/src/parquet/arrow/reader.h
@@ -357,11 +357,21 @@ class PARQUET_EXPORT FileReaderBuilder {
/// \brief Build FileReader from Arrow file and MemoryPool
///
/// Advanced settings are supported through the FileReaderBuilder class.
+///
+/// \deprecated Deprecated in 19.0.0. Use arrow::Result version instead.
+ARROW_DEPRECATED("Deprecated in 19.0.0. Use arrow::Result version instead.")
PARQUET_EXPORT
::arrow::Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile>,
::arrow::MemoryPool* allocator,
std::unique_ptr<FileReader>* reader);
+/// \brief Build FileReader from Arrow file and MemoryPool
+///
+/// Advanced settings are supported through the FileReaderBuilder class.
+PARQUET_EXPORT
+::arrow::Result<std::unique_ptr<FileReader>> OpenFile(
+ std::shared_ptr<::arrow::io::RandomAccessFile>, ::arrow::MemoryPool*
allocator);
+
/// @}
PARQUET_EXPORT
diff --git a/cpp/src/parquet/arrow/reconstruct_internal_test.cc
b/cpp/src/parquet/arrow/reconstruct_internal_test.cc
index 4e1f421498..ecdbcc5a3d 100644
--- a/cpp/src/parquet/arrow/reconstruct_internal_test.cc
+++ b/cpp/src/parquet/arrow/reconstruct_internal_test.cc
@@ -189,7 +189,9 @@ class FileTester {
protected:
Status Open(std::shared_ptr<Buffer> buffer, MemoryPool* pool) {
pool_ = pool;
- return OpenFile(std::make_shared<BufferReader>(buffer), pool_,
&file_reader_);
+ ARROW_ASSIGN_OR_RAISE(file_reader_,
+ OpenFile(std::make_shared<BufferReader>(buffer),
pool_));
+ return Status::OK();
}
MemoryPool* pool_;