This is an automated email from the ASF dual-hosted git repository.
yibocai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9565ba20b2 ARROW-16711: [C++] Remove deprecated ORC APIs (#13286)
9565ba20b2 is described below
commit 9565ba20b29de639c8a36868b4a937d3bf600d9d
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Jun 2 11:19:30 2022 +0200
ARROW-16711: [C++] Remove deprecated ORC APIs (#13286)
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Yibo Cai <[email protected]>
---
cpp/src/arrow/adapters/orc/adapter.cc | 209 +++++++++++-----------------------
cpp/src/arrow/adapters/orc/adapter.h | 99 ----------------
cpp/src/arrow/adapters/orc/util.cc | 95 ++++++----------
cpp/src/arrow/adapters/orc/util.h | 5 +-
4 files changed, 101 insertions(+), 307 deletions(-)
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc
b/cpp/src/arrow/adapters/orc/adapter.cc
index 7d3309e4e7..a95b8325f5 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -282,16 +282,16 @@ class ORCFileReader::Impl {
std::string GetSerializedFileTail() { return
reader_->getSerializedFileTail(); }
- Status ReadSchema(std::shared_ptr<Schema>* out) {
+ Result<std::shared_ptr<Schema>> ReadSchema() {
const liborc::Type& type = reader_->getType();
- return GetArrowSchema(type, out);
+ return GetArrowSchema(type);
}
- Status ReadSchema(const liborc::RowReaderOptions& opts,
std::shared_ptr<Schema>* out) {
+ Result<std::shared_ptr<Schema>> ReadSchema(const liborc::RowReaderOptions&
opts) {
std::unique_ptr<liborc::RowReader> row_reader;
ORC_CATCH_NOT_OK(row_reader = reader_->createRowReader(opts));
const liborc::Type& type = row_reader->getSelectedType();
- return GetArrowSchema(type, out);
+ return GetArrowSchema(type);
}
Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() {
@@ -303,7 +303,7 @@ class ORCFileReader::Impl {
return std::const_pointer_cast<const KeyValueMetadata>(metadata);
}
- Status GetArrowSchema(const liborc::Type& type, std::shared_ptr<Schema>*
out) {
+ Result<std::shared_ptr<Schema>> GetArrowSchema(const liborc::Type& type) {
if (type.getKind() != liborc::STRUCT) {
return Status::NotImplemented(
"Only ORC files with a top-level struct "
@@ -311,79 +311,71 @@ class ORCFileReader::Impl {
}
int size = static_cast<int>(type.getSubtypeCount());
std::vector<std::shared_ptr<Field>> fields;
+ fields.reserve(size);
for (int child = 0; child < size; ++child) {
- std::shared_ptr<DataType> elemtype;
- RETURN_NOT_OK(GetArrowType(type.getSubtype(child), &elemtype));
+ ARROW_ASSIGN_OR_RAISE(auto elemtype,
GetArrowType(type.getSubtype(child)));
std::string name = type.getFieldName(child);
- fields.push_back(field(name, elemtype));
+ fields.push_back(field(std::move(name), std::move(elemtype)));
}
ARROW_ASSIGN_OR_RAISE(auto metadata, ReadMetadata());
- *out = std::make_shared<Schema>(std::move(fields), std::move(metadata));
- return Status::OK();
+ return std::make_shared<Schema>(std::move(fields), std::move(metadata));
}
- Status Read(std::shared_ptr<Table>* out) {
+ Result<std::shared_ptr<Table>> Read() {
liborc::RowReaderOptions opts;
- std::shared_ptr<Schema> schema;
- RETURN_NOT_OK(ReadSchema(opts, &schema));
- return ReadTable(opts, schema, out);
+ ARROW_ASSIGN_OR_RAISE(auto schema, ReadSchema());
+ return ReadTable(opts, schema);
}
- Status Read(const std::shared_ptr<Schema>& schema, std::shared_ptr<Table>*
out) {
+ Result<std::shared_ptr<Table>> Read(const std::shared_ptr<Schema>& schema) {
liborc::RowReaderOptions opts;
- return ReadTable(opts, schema, out);
+ return ReadTable(opts, schema);
}
- Status Read(const std::vector<int>& include_indices, std::shared_ptr<Table>*
out) {
+ Result<std::shared_ptr<Table>> Read(const std::vector<int>& include_indices)
{
liborc::RowReaderOptions opts;
RETURN_NOT_OK(SelectIndices(&opts, include_indices));
- std::shared_ptr<Schema> schema;
- RETURN_NOT_OK(ReadSchema(opts, &schema));
- return ReadTable(opts, schema, out);
+ ARROW_ASSIGN_OR_RAISE(auto schema, ReadSchema(opts));
+ return ReadTable(opts, schema);
}
- Status Read(const std::vector<std::string>& include_names,
- std::shared_ptr<Table>* out) {
+ Result<std::shared_ptr<Table>> Read(const std::vector<std::string>&
include_names) {
liborc::RowReaderOptions opts;
RETURN_NOT_OK(SelectNames(&opts, include_names));
- std::shared_ptr<Schema> schema;
- RETURN_NOT_OK(ReadSchema(opts, &schema));
- return ReadTable(opts, schema, out);
+ ARROW_ASSIGN_OR_RAISE(auto schema, ReadSchema(opts));
+ return ReadTable(opts, schema);
}
- Status Read(const std::shared_ptr<Schema>& schema,
- const std::vector<int>& include_indices, std::shared_ptr<Table>*
out) {
+ Result<std::shared_ptr<Table>> Read(const std::shared_ptr<Schema>& schema,
+ const std::vector<int>& include_indices)
{
liborc::RowReaderOptions opts;
RETURN_NOT_OK(SelectIndices(&opts, include_indices));
- return ReadTable(opts, schema, out);
+ return ReadTable(opts, schema);
}
- Status ReadStripe(int64_t stripe, std::shared_ptr<RecordBatch>* out) {
+ Result<std::shared_ptr<RecordBatch>> ReadStripe(int64_t stripe) {
liborc::RowReaderOptions opts;
RETURN_NOT_OK(SelectStripe(&opts, stripe));
- std::shared_ptr<Schema> schema;
- RETURN_NOT_OK(ReadSchema(opts, &schema));
- return ReadBatch(opts, schema, stripes_[stripe].num_rows, out);
+ ARROW_ASSIGN_OR_RAISE(auto schema, ReadSchema(opts));
+ return ReadBatch(opts, schema, stripes_[stripe].num_rows);
}
- Status ReadStripe(int64_t stripe, const std::vector<int>& include_indices,
- std::shared_ptr<RecordBatch>* out) {
+ Result<std::shared_ptr<RecordBatch>> ReadStripe(
+ int64_t stripe, const std::vector<int>& include_indices) {
liborc::RowReaderOptions opts;
RETURN_NOT_OK(SelectIndices(&opts, include_indices));
RETURN_NOT_OK(SelectStripe(&opts, stripe));
- std::shared_ptr<Schema> schema;
- RETURN_NOT_OK(ReadSchema(opts, &schema));
- return ReadBatch(opts, schema, stripes_[stripe].num_rows, out);
+ ARROW_ASSIGN_OR_RAISE(auto schema, ReadSchema(opts));
+ return ReadBatch(opts, schema, stripes_[stripe].num_rows);
}
- Status ReadStripe(int64_t stripe, const std::vector<std::string>&
include_names,
- std::shared_ptr<RecordBatch>* out) {
+ Result<std::shared_ptr<RecordBatch>> ReadStripe(
+ int64_t stripe, const std::vector<std::string>& include_names) {
liborc::RowReaderOptions opts;
RETURN_NOT_OK(SelectNames(&opts, include_names));
RETURN_NOT_OK(SelectStripe(&opts, stripe));
- std::shared_ptr<Schema> schema;
- RETURN_NOT_OK(ReadSchema(opts, &schema));
- return ReadBatch(opts, schema, stripes_[stripe].num_rows, out);
+ ARROW_ASSIGN_OR_RAISE(auto schema, ReadSchema(opts));
+ return ReadBatch(opts, schema, stripes_[stripe].num_rows);
}
Status SelectStripe(liborc::RowReaderOptions* opts, int64_t stripe) {
@@ -429,20 +421,21 @@ class ORCFileReader::Impl {
return Status::OK();
}
- Status ReadTable(const liborc::RowReaderOptions& row_opts,
- const std::shared_ptr<Schema>& schema,
std::shared_ptr<Table>* out) {
+ Result<std::shared_ptr<Table>> ReadTable(const liborc::RowReaderOptions&
row_opts,
+ const std::shared_ptr<Schema>&
schema) {
liborc::RowReaderOptions opts(row_opts);
std::vector<std::shared_ptr<RecordBatch>> batches(stripes_.size());
for (size_t stripe = 0; stripe < stripes_.size(); stripe++) {
opts.range(stripes_[stripe].offset, stripes_[stripe].length);
- RETURN_NOT_OK(ReadBatch(opts, schema, stripes_[stripe].num_rows,
&batches[stripe]));
+ ARROW_ASSIGN_OR_RAISE(batches[stripe],
+ ReadBatch(opts, schema,
stripes_[stripe].num_rows));
}
- return Table::FromRecordBatches(schema, std::move(batches)).Value(out);
+ return Table::FromRecordBatches(schema, std::move(batches));
}
- Status ReadBatch(const liborc::RowReaderOptions& opts,
- const std::shared_ptr<Schema>& schema, int64_t nrows,
- std::shared_ptr<RecordBatch>* out) {
+ Result<std::shared_ptr<RecordBatch>> ReadBatch(const
liborc::RowReaderOptions& opts,
+ const
std::shared_ptr<Schema>& schema,
+ int64_t nrows) {
std::unique_ptr<liborc::RowReader> row_reader;
std::unique_ptr<liborc::ColumnVectorBatch> batch;
@@ -464,8 +457,9 @@ class ORCFileReader::Impl {
batch->numElements, builder->GetField(i)));
}
}
- RETURN_NOT_OK(builder->Flush(out));
- return Status::OK();
+ std::shared_ptr<RecordBatch> out;
+ RETURN_NOT_OK(builder->Flush(&out));
+ return out;
}
Status Seek(int64_t row_number) {
@@ -476,11 +470,10 @@ class ORCFileReader::Impl {
return Status::OK();
}
- Status NextStripeReader(int64_t batch_size, const std::vector<int>&
include_indices,
- std::shared_ptr<RecordBatchReader>* out) {
+ Result<std::shared_ptr<RecordBatchReader>> NextStripeReader(
+ int64_t batch_size, const std::vector<int>& include_indices) {
if (current_row_ >= NumberOfRows()) {
- out->reset();
- return Status::OK();
+ return nullptr;
}
liborc::RowReaderOptions opts;
@@ -489,8 +482,7 @@ class ORCFileReader::Impl {
}
StripeInformation stripe_info({0, 0, 0, 0});
RETURN_NOT_OK(SelectStripeWithRowNumber(&opts, current_row_,
&stripe_info));
- std::shared_ptr<Schema> schema;
- RETURN_NOT_OK(ReadSchema(opts, &schema));
+ ARROW_ASSIGN_OR_RAISE(auto schema, ReadSchema(opts));
std::unique_ptr<liborc::RowReader> row_reader;
ORC_BEGIN_CATCH_NOT_OK
@@ -499,9 +491,8 @@ class ORCFileReader::Impl {
current_row_ = stripe_info.first_row_of_stripe + stripe_info.num_rows;
ORC_END_CATCH_NOT_OK
- *out = std::make_shared<OrcStripeReader>(std::move(row_reader), schema,
batch_size,
+ return std::make_shared<OrcStripeReader>(std::move(row_reader), schema,
batch_size,
pool_);
- return Status::OK();
}
Result<std::shared_ptr<RecordBatchReader>> GetRecordBatchReader(
@@ -510,8 +501,7 @@ class ORCFileReader::Impl {
if (!include_names.empty()) {
RETURN_NOT_OK(SelectNames(&opts, include_names));
}
- std::shared_ptr<Schema> schema;
- RETURN_NOT_OK(ReadSchema(opts, &schema));
+ ARROW_ASSIGN_OR_RAISE(auto schema, ReadSchema(opts));
std::unique_ptr<liborc::RowReader> row_reader;
ORC_BEGIN_CATCH_NOT_OK
@@ -522,9 +512,9 @@ class ORCFileReader::Impl {
pool_);
}
- Status NextStripeReader(int64_t batch_size,
std::shared_ptr<RecordBatchReader>* out) {
+ Result<std::shared_ptr<RecordBatchReader>> NextStripeReader(int64_t
batch_size) {
std::vector<int> empty_vec;
- return NextStripeReader(batch_size, empty_vec, out);
+ return NextStripeReader(batch_size, empty_vec);
}
private:
@@ -538,11 +528,6 @@ ORCFileReader::ORCFileReader() { impl_.reset(new
ORCFileReader::Impl()); }
ORCFileReader::~ORCFileReader() {}
-Status ORCFileReader::Open(const std::shared_ptr<io::RandomAccessFile>& file,
- MemoryPool* pool, std::unique_ptr<ORCFileReader>*
reader) {
- return Open(file, pool).Value(reader);
-}
-
Result<std::unique_ptr<ORCFileReader>> ORCFileReader::Open(
const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool) {
auto result = std::unique_ptr<ORCFileReader>(new ORCFileReader());
@@ -554,115 +539,51 @@ Result<std::shared_ptr<const KeyValueMetadata>>
ORCFileReader::ReadMetadata() {
return impl_->ReadMetadata();
}
-Status ORCFileReader::ReadSchema(std::shared_ptr<Schema>* out) {
- return impl_->ReadSchema(out);
-}
-
Result<std::shared_ptr<Schema>> ORCFileReader::ReadSchema() {
- std::shared_ptr<Schema> schema;
- RETURN_NOT_OK(impl_->ReadSchema(&schema));
- return schema;
+ return impl_->ReadSchema();
}
-Status ORCFileReader::Read(std::shared_ptr<Table>* out) { return
impl_->Read(out); }
-
-Result<std::shared_ptr<Table>> ORCFileReader::Read() {
- std::shared_ptr<Table> table;
- RETURN_NOT_OK(impl_->Read(&table));
- return table;
-}
-
-Status ORCFileReader::Read(const std::shared_ptr<Schema>& schema,
- std::shared_ptr<Table>* out) {
- return impl_->Read(schema, out);
-}
+Result<std::shared_ptr<Table>> ORCFileReader::Read() { return impl_->Read(); }
Result<std::shared_ptr<Table>> ORCFileReader::Read(
const std::shared_ptr<Schema>& schema) {
- std::shared_ptr<Table> table;
- RETURN_NOT_OK(impl_->Read(schema, &table));
- return table;
-}
-
-Status ORCFileReader::Read(const std::vector<int>& include_indices,
- std::shared_ptr<Table>* out) {
- return impl_->Read(include_indices, out);
+ return impl_->Read(schema);
}
Result<std::shared_ptr<Table>> ORCFileReader::Read(
const std::vector<int>& include_indices) {
- std::shared_ptr<Table> table;
- RETURN_NOT_OK(impl_->Read(include_indices, &table));
- return table;
+ return impl_->Read(include_indices);
}
Result<std::shared_ptr<Table>> ORCFileReader::Read(
const std::vector<std::string>& include_names) {
- std::shared_ptr<Table> table;
- RETURN_NOT_OK(impl_->Read(include_names, &table));
- return table;
-}
-
-Status ORCFileReader::Read(const std::shared_ptr<Schema>& schema,
- const std::vector<int>& include_indices,
- std::shared_ptr<Table>* out) {
- return impl_->Read(schema, include_indices, out);
+ return impl_->Read(include_names);
}
Result<std::shared_ptr<Table>> ORCFileReader::Read(
const std::shared_ptr<Schema>& schema, const std::vector<int>&
include_indices) {
- std::shared_ptr<Table> table;
- RETURN_NOT_OK(impl_->Read(schema, include_indices, &table));
- return table;
-}
-
-Status ORCFileReader::ReadStripe(int64_t stripe, std::shared_ptr<RecordBatch>*
out) {
- return impl_->ReadStripe(stripe, out);
+ return impl_->Read(schema, include_indices);
}
Result<std::shared_ptr<RecordBatch>> ORCFileReader::ReadStripe(int64_t stripe)
{
- std::shared_ptr<RecordBatch> recordBatch;
- RETURN_NOT_OK(impl_->ReadStripe(stripe, &recordBatch));
- return recordBatch;
-}
-
-Status ORCFileReader::ReadStripe(int64_t stripe, const std::vector<int>&
include_indices,
- std::shared_ptr<RecordBatch>* out) {
- return impl_->ReadStripe(stripe, include_indices, out);
+ return impl_->ReadStripe(stripe);
}
Result<std::shared_ptr<RecordBatch>> ORCFileReader::ReadStripe(
int64_t stripe, const std::vector<int>& include_indices) {
- std::shared_ptr<RecordBatch> recordBatch;
- RETURN_NOT_OK(impl_->ReadStripe(stripe, include_indices, &recordBatch));
- return recordBatch;
+ return impl_->ReadStripe(stripe, include_indices);
}
Result<std::shared_ptr<RecordBatch>> ORCFileReader::ReadStripe(
int64_t stripe, const std::vector<std::string>& include_names) {
- std::shared_ptr<RecordBatch> recordBatch;
- RETURN_NOT_OK(impl_->ReadStripe(stripe, include_names, &recordBatch));
- return recordBatch;
+ return impl_->ReadStripe(stripe, include_names);
}
Status ORCFileReader::Seek(int64_t row_number) { return
impl_->Seek(row_number); }
-Status ORCFileReader::NextStripeReader(int64_t batch_sizes,
- std::shared_ptr<RecordBatchReader>*
out) {
- return impl_->NextStripeReader(batch_sizes, out);
-}
-
Result<std::shared_ptr<RecordBatchReader>> ORCFileReader::NextStripeReader(
int64_t batch_size) {
- std::shared_ptr<RecordBatchReader> reader;
- RETURN_NOT_OK(impl_->NextStripeReader(batch_size, &reader));
- return reader;
-}
-
-Status ORCFileReader::NextStripeReader(int64_t batch_size,
- const std::vector<int>& include_indices,
- std::shared_ptr<RecordBatchReader>*
out) {
- return impl_->NextStripeReader(batch_size, include_indices, out);
+ return impl_->NextStripeReader(batch_size);
}
Result<std::shared_ptr<RecordBatchReader>> ORCFileReader::GetRecordBatchReader(
@@ -672,9 +593,7 @@ Result<std::shared_ptr<RecordBatchReader>>
ORCFileReader::GetRecordBatchReader(
Result<std::shared_ptr<RecordBatchReader>> ORCFileReader::NextStripeReader(
int64_t batch_size, const std::vector<int>& include_indices) {
- std::shared_ptr<RecordBatchReader> reader;
- RETURN_NOT_OK(impl_->NextStripeReader(batch_size, include_indices, &reader));
- return reader;
+ return impl_->NextStripeReader(batch_size, include_indices);
}
int64_t ORCFileReader::NumberOfStripes() { return impl_->NumberOfStripes(); }
diff --git a/cpp/src/arrow/adapters/orc/adapter.h
b/cpp/src/arrow/adapters/orc/adapter.h
index 02e2bc9843..59f63796bd 100644
--- a/cpp/src/arrow/adapters/orc/adapter.h
+++ b/cpp/src/arrow/adapters/orc/adapter.h
@@ -41,16 +41,6 @@ class ARROW_EXPORT ORCFileReader {
public:
~ORCFileReader();
- /// \brief Creates a new ORC reader.
- ///
- /// \param[in] file the data source
- /// \param[in] pool a MemoryPool to use for buffer allocations
- /// \param[out] reader the returned reader object
- /// \return Status
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- static Status Open(const std::shared_ptr<io::RandomAccessFile>& file,
MemoryPool* pool,
- std::unique_ptr<ORCFileReader>* reader);
-
/// \brief Creates a new ORC reader
///
/// \param[in] file the data source
@@ -59,25 +49,11 @@ class ARROW_EXPORT ORCFileReader {
static Result<std::unique_ptr<ORCFileReader>> Open(
const std::shared_ptr<io::RandomAccessFile>& file, MemoryPool* pool);
- /// \brief Return the schema read from the ORC file
- ///
- /// \param[out] out the returned Schema object
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- Status ReadSchema(std::shared_ptr<Schema>* out);
-
/// \brief Return the schema read from the ORC file
///
/// \return the returned Schema object
Result<std::shared_ptr<Schema>> ReadSchema();
- /// \brief Read the file as a Table
- ///
- /// The table will be composed of one record batch per stripe.
- ///
- /// \param[out] out the returned Table
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- Status Read(std::shared_ptr<Table>* out);
-
/// \brief Read the file as a Table
///
/// The table will be composed of one record batch per stripe.
@@ -85,15 +61,6 @@ class ARROW_EXPORT ORCFileReader {
/// \return the returned Table
Result<std::shared_ptr<Table>> Read();
- /// \brief Read the file as a Table
- ///
- /// The table will be composed of one record batch per stripe.
- ///
- /// \param[in] schema the Table schema
- /// \param[out] out the returned Table
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- Status Read(const std::shared_ptr<Schema>& schema, std::shared_ptr<Table>*
out);
-
/// \brief Read the file as a Table
///
/// The table will be composed of one record batch per stripe.
@@ -102,15 +69,6 @@ class ARROW_EXPORT ORCFileReader {
/// \return the returned Table
Result<std::shared_ptr<Table>> Read(const std::shared_ptr<Schema>& schema);
- /// \brief Read the file as a Table
- ///
- /// The table will be composed of one record batch per stripe.
- ///
- /// \param[in] include_indices the selected field indices to read
- /// \param[out] out the returned Table
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- Status Read(const std::vector<int>& include_indices, std::shared_ptr<Table>*
out);
-
/// \brief Read the file as a Table
///
/// The table will be composed of one record batch per stripe.
@@ -127,17 +85,6 @@ class ARROW_EXPORT ORCFileReader {
/// \return the returned Table
Result<std::shared_ptr<Table>> Read(const std::vector<std::string>&
include_names);
- /// \brief Read the file as a Table
- ///
- /// The table will be composed of one record batch per stripe.
- ///
- /// \param[in] schema the Table schema
- /// \param[in] include_indices the selected field indices to read
- /// \param[out] out the returned Table
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- Status Read(const std::shared_ptr<Schema>& schema,
- const std::vector<int>& include_indices, std::shared_ptr<Table>*
out);
-
/// \brief Read the file as a Table
///
/// The table will be composed of one record batch per stripe.
@@ -148,28 +95,12 @@ class ARROW_EXPORT ORCFileReader {
Result<std::shared_ptr<Table>> Read(const std::shared_ptr<Schema>& schema,
const std::vector<int>& include_indices);
- /// \brief Read a single stripe as a RecordBatch
- ///
- /// \param[in] stripe the stripe index
- /// \param[out] out the returned RecordBatch
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- Status ReadStripe(int64_t stripe, std::shared_ptr<RecordBatch>* out);
-
/// \brief Read a single stripe as a RecordBatch
///
/// \param[in] stripe the stripe index
/// \return the returned RecordBatch
Result<std::shared_ptr<RecordBatch>> ReadStripe(int64_t stripe);
- /// \brief Read a single stripe as a RecordBatch
- ///
- /// \param[in] stripe the stripe index
- /// \param[in] include_indices the selected field indices to read
- /// \param[out] out the returned RecordBatch
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- Status ReadStripe(int64_t stripe, const std::vector<int>& include_indices,
- std::shared_ptr<RecordBatch>* out);
-
/// \brief Read a single stripe as a RecordBatch
///
/// \param[in] stripe the stripe index
@@ -192,20 +123,6 @@ class ARROW_EXPORT ORCFileReader {
/// \param[in] row_number the rows number to seek
Status Seek(int64_t row_number);
- /// \brief Get a stripe level record batch iterator.
- ///
- /// Each record batch will have up to `batch_size` rows.
- /// NextStripeReader serves as a fine grained alternative to ReadStripe
- /// which may cause OOM issues by loading the whole stripe into memory.
- ///
- /// Note this will only read rows for the current stripe, not the entire
- /// file.
- ///
- /// \param[in] batch_size the maximum number of rows in each record batch
- /// \param[out] out the returned stripe reader
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- Status NextStripeReader(int64_t batch_size,
std::shared_ptr<RecordBatchReader>* out);
-
/// \brief Get a stripe level record batch iterator.
///
/// Each record batch will have up to `batch_size` rows.
@@ -219,22 +136,6 @@ class ARROW_EXPORT ORCFileReader {
/// \return the returned stripe reader
Result<std::shared_ptr<RecordBatchReader>> NextStripeReader(int64_t
batch_size);
- /// \brief Get a stripe level record batch iterator.
- ///
- /// Each record batch will have up to `batch_size` rows.
- /// NextStripeReader serves as a fine grained alternative to ReadStripe
- /// which may cause OOM issues by loading the whole stripe into memory.
- ///
- /// Note this will only read rows for the current stripe, not the entire
- /// file.
- ///
- /// \param[in] batch_size the maximum number of rows in each record batch
- /// \param[in] include_indices the selected field indices to read
- /// \param[out] out the returned stripe reader
- ARROW_DEPRECATED("Deprecated in 6.0.0. Use Result-returning overload
instead.")
- Status NextStripeReader(int64_t batch_size, const std::vector<int>&
include_indices,
- std::shared_ptr<RecordBatchReader>* out);
-
/// \brief Get a stripe level record batch iterator.
///
/// Each record batch will have up to `batch_size` rows.
diff --git a/cpp/src/arrow/adapters/orc/util.cc
b/cpp/src/arrow/adapters/orc/util.cc
index 14931752bf..5a8ae93532 100644
--- a/cpp/src/arrow/adapters/orc/util.cc
+++ b/cpp/src/arrow/adapters/orc/util.cc
@@ -945,62 +945,49 @@ Status WriteBatch(const ChunkedArray& chunked_array,
int64_t length,
return Status::OK();
}
-Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
+Result<std::shared_ptr<DataType>> GetArrowType(const liborc::Type* type) {
// When subselecting fields on read, liborc will set some nodes to nullptr,
// so we need to check for nullptr before progressing
if (type == nullptr) {
- *out = null();
- return Status::OK();
+ return null();
}
liborc::TypeKind kind = type->getKind();
const int subtype_count = static_cast<int>(type->getSubtypeCount());
switch (kind) {
case liborc::BOOLEAN:
- *out = boolean();
- break;
+ return boolean();
case liborc::BYTE:
- *out = int8();
- break;
+ return int8();
case liborc::SHORT:
- *out = int16();
- break;
+ return int16();
case liborc::INT:
- *out = int32();
- break;
+ return int32();
case liborc::LONG:
- *out = int64();
- break;
+ return int64();
case liborc::FLOAT:
- *out = float32();
- break;
+ return float32();
case liborc::DOUBLE:
- *out = float64();
- break;
+ return float64();
case liborc::VARCHAR:
case liborc::STRING:
- *out = utf8();
- break;
+ return utf8();
case liborc::BINARY:
- *out = binary();
- break;
+ return binary();
case liborc::CHAR:
- *out = fixed_size_binary(static_cast<int>(type->getMaximumLength()));
- break;
+ return fixed_size_binary(static_cast<int>(type->getMaximumLength()));
case liborc::TIMESTAMP:
- *out = timestamp(TimeUnit::NANO);
- break;
+ return timestamp(TimeUnit::NANO);
case liborc::DATE:
- *out = date32();
- break;
+ return date32();
case liborc::DECIMAL: {
const int precision = static_cast<int>(type->getPrecision());
const int scale = static_cast<int>(type->getScale());
if (precision == 0) {
// In HIVE 0.11/0.12 precision is set as 0, but means max precision
- *out = decimal128(38, 6);
+ return decimal128(38, 6);
} else {
- *out = decimal128(precision, scale);
+ return decimal128(precision, scale);
}
break;
}
@@ -1008,60 +995,48 @@ Status GetArrowType(const liborc::Type* type,
std::shared_ptr<DataType>* out) {
if (subtype_count != 1) {
return Status::TypeError("Invalid Orc List type");
}
- std::shared_ptr<DataType> elemtype;
- RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &elemtype));
- *out = list(elemtype);
- break;
+ ARROW_ASSIGN_OR_RAISE(auto elemtype, GetArrowType(type->getSubtype(0)));
+ return list(std::move(elemtype));
}
case liborc::MAP: {
if (subtype_count != 2) {
return Status::TypeError("Invalid Orc Map type");
}
- std::shared_ptr<DataType> key_type, item_type;
- RETURN_NOT_OK(GetArrowType(type->getSubtype(0), &key_type));
- RETURN_NOT_OK(GetArrowType(type->getSubtype(1), &item_type));
- *out = map(key_type, item_type);
- break;
+ ARROW_ASSIGN_OR_RAISE(auto key_type, GetArrowType(type->getSubtype(0)));
+ ARROW_ASSIGN_OR_RAISE(auto item_type, GetArrowType(type->getSubtype(1)));
+ return map(std::move(key_type), std::move(item_type));
}
case liborc::STRUCT: {
- std::vector<std::shared_ptr<Field>> fields;
+ FieldVector fields(subtype_count);
for (int child = 0; child < subtype_count; ++child) {
- std::shared_ptr<DataType> elem_type;
- RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type));
+ ARROW_ASSIGN_OR_RAISE(auto elem_type,
GetArrowType(type->getSubtype(child)));
std::string name = type->getFieldName(child);
- fields.push_back(field(name, elem_type));
+ fields[child] = field(std::move(name), std::move(elem_type));
}
- *out = struct_(fields);
- break;
+ return struct_(std::move(fields));
}
case liborc::UNION: {
- std::vector<std::shared_ptr<Field>> fields;
- std::vector<int8_t> type_codes;
+ FieldVector fields(subtype_count);
+ std::vector<int8_t> type_codes(subtype_count);
for (int child = 0; child < subtype_count; ++child) {
- std::shared_ptr<DataType> elem_type;
- RETURN_NOT_OK(GetArrowType(type->getSubtype(child), &elem_type));
- fields.push_back(field("_union_" + std::to_string(child), elem_type));
- type_codes.push_back(static_cast<int8_t>(child));
+ ARROW_ASSIGN_OR_RAISE(auto elem_type,
GetArrowType(type->getSubtype(child)));
+ fields[child] = field("_union_" + std::to_string(child),
std::move(elem_type));
+ type_codes[child] = static_cast<int8_t>(child);
}
- *out = sparse_union(fields, type_codes);
- break;
+ return sparse_union(std::move(fields), std::move(type_codes));
}
- default: {
+ default:
return Status::TypeError("Unknown Orc type kind: ", type->toString());
- }
}
- return Status::OK();
}
Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema) {
int numFields = schema.num_fields();
ORC_UNIQUE_PTR<liborc::Type> out_type = liborc::createStructType();
for (int i = 0; i < numFields; i++) {
- std::shared_ptr<Field> field = schema.field(i);
- std::string field_name = field->name();
- std::shared_ptr<DataType> arrow_child_type = field->type();
- ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*arrow_child_type));
- out_type->addStructField(field_name, std::move(orc_subtype));
+ const auto& field = schema.field(i);
+ ARROW_ASSIGN_OR_RAISE(auto orc_subtype, GetOrcType(*field->type()));
+ out_type->addStructField(field->name(), std::move(orc_subtype));
}
return std::move(out_type);
}
diff --git a/cpp/src/arrow/adapters/orc/util.h
b/cpp/src/arrow/adapters/orc/util.h
index 3e6d0fcc66..515de447a6 100644
--- a/cpp/src/arrow/adapters/orc/util.h
+++ b/cpp/src/arrow/adapters/orc/util.h
@@ -21,18 +21,17 @@
#include <memory>
#include "arrow/array/builder_base.h"
+#include "arrow/result.h"
#include "arrow/status.h"
#include "orc/OrcFile.hh"
namespace liborc = orc;
namespace arrow {
-
namespace adapters {
-
namespace orc {
-Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out);
+Result<std::shared_ptr<DataType>> GetArrowType(const liborc::Type* type);
Result<ORC_UNIQUE_PTR<liborc::Type>> GetOrcType(const Schema& schema);