This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new caf1c6022c feat(array): add `RecordBatchStream` trait (#9166)
caf1c6022c is described below

commit caf1c6022c71af00ef712e9e865acfee74169f0d
Author: David Li <[email protected]>
AuthorDate: Thu Jan 29 07:40:58 2026 +0900

    feat(array): add `RecordBatchStream` trait (#9166)
    
    # Which issue does this PR close?
    
    Closes #7228.
    
    # Rationale for this change
    
    Provide a common trait definition for people in the async ecosystem.
    
    # What changes are included in this PR?
    
    An equivalent for the RecordBatchReader trait using futures::Stream.
    
    # Are these changes tested?
    
    WIP
    
    # Are there any user-facing changes?
    
    Yes.
---
 arrow-array/Cargo.toml         |  2 ++
 arrow-array/src/lib.rs         |  2 ++
 arrow-array/src/nonblocking.rs | 31 +++++++++++++++++++++++++++++++
 arrow/Cargo.toml               |  1 +
 arrow/README.md                |  1 +
 5 files changed, 37 insertions(+)

diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml
index 8ab0bb290e..0a1d6fde91 100644
--- a/arrow-array/Cargo.toml
+++ b/arrow-array/Cargo.toml
@@ -44,6 +44,7 @@ arrow-schema = { workspace = true }
 arrow-data = { workspace = true }
 chrono = { workspace = true }
 chrono-tz = { version = "0.10", optional = true }
+futures = { version = "0.3", optional = true }
 num-complex = { version = "0.4.6", default-features = false, features = 
["std"] }
 num-integer = { version = "0.1.46", default-features = false, features = 
["std"] }
 num-traits = { version = "0.2.19", default-features = false, features = 
["std"] }
@@ -54,6 +55,7 @@ hashbrown = { version = "0.16.0", default-features = false }
 all-features = true
 
 [features]
+async = ["dep:futures"]
 ffi = ["arrow-schema/ffi", "arrow-data/ffi"]
 force_validate = []
 
diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs
index 86c1c6550c..a5f9bf5e71 100644
--- a/arrow-array/src/lib.rs
+++ b/arrow-array/src/lib.rs
@@ -254,6 +254,8 @@ pub mod ffi;
 #[cfg(feature = "ffi")]
 pub mod ffi_stream;
 pub mod iterator;
+#[cfg(feature = "async")]
+pub mod nonblocking;
 pub mod run_iterator;
 pub mod temporal_conversions;
 pub mod timezone;
diff --git a/arrow-array/src/nonblocking.rs b/arrow-array/src/nonblocking.rs
new file mode 100644
index 0000000000..87c317c083
--- /dev/null
+++ b/arrow-array/src/nonblocking.rs
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Trait definitions specific to using Arrow with async.
+
+use arrow_schema::{ArrowError, SchemaRef};
+
+use crate::record_batch::RecordBatch;
+
+/// Trait for an asynchronous stream of `RecordBatch`es.
+pub trait RecordBatchStream: futures::Stream<Item = Result<RecordBatch, 
ArrowError>> {
+    /// Return the schema of this `RecordBatchStream`.
+    ///
+    /// Implementation of this trait should guarantee that all `RecordBatch`'s 
returned by this
+    /// reader should have the same schema as returned from this method.
+    fn schema(&self) -> &SchemaRef;
+}
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 0c5a925ae3..fbb52318d3 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -62,6 +62,7 @@ all-features = true
 
 [features]
 default = ["csv", "ipc", "json"]
+async = ["arrow-array/async"]
 ipc_compression = ["ipc", "arrow-ipc/lz4", "arrow-ipc/zstd"]
 csv = ["arrow-csv"]
 ipc = ["arrow-ipc"]
diff --git a/arrow/README.md b/arrow/README.md
index 7397db8c8a..7c55932d2f 100644
--- a/arrow/README.md
+++ b/arrow/README.md
@@ -70,6 +70,7 @@ The `arrow` crate provides the following features which may 
be enabled in your `
 - `ffi` - bindings for the Arrow C [C Data 
Interface](https://arrow.apache.org/docs/format/CDataInterface.html)
 - `pyarrow` - bindings for pyo3 to call arrow-rs from python
 - `canonical_extension_types` - definitions for [canonical extension 
types](https://arrow.apache.org/docs/format/CanonicalExtensions.html#format-canonical-extensions)
+- `async` - definitions for traits using `async`, intended to work with the 
async ecosystem
 
 ## Arrow Feature Status
 

Reply via email to