This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new ae9b0dbaa8e Move ffi stream and utils from arrow to arrow-array (#5670)
ae9b0dbaa8e is described below
commit ae9b0dbaa8e201a68c6bdec1632c69e6308f27c9
Author: Alexandre Crayssac <[email protected]>
AuthorDate: Mon Apr 22 19:34:28 2024 +0200
Move ffi stream and utils from arrow to arrow-array (#5670)
* Move ffi stream and utils from arrow to arrow-array
* Fix CI
* Remove TODOs
---
arrow-array/Cargo.toml | 3 +
{arrow => arrow-array}/src/ffi.rs | 363 +++++++++++++++++++++++--------
{arrow => arrow-array}/src/ffi_stream.rs | 15 +-
arrow-array/src/lib.rs | 4 +
arrow/Cargo.toml | 2 +-
arrow/src/array/ffi.rs | 254 ---------------------
arrow/src/array/mod.rs | 5 +-
arrow/src/lib.rs | 4 +-
8 files changed, 289 insertions(+), 361 deletions(-)
diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml
index 0bc2facb537..b00d2c88e1a 100644
--- a/arrow-array/Cargo.toml
+++ b/arrow-array/Cargo.toml
@@ -50,6 +50,9 @@ num = { version = "0.4.1", default-features = false, features
= ["std"] }
half = { version = "2.1", default-features = false, features = ["num-traits"] }
hashbrown = { version = "0.14", default-features = false }
+[features]
+ffi = ["arrow-schema/ffi", "arrow-data/ffi"]
+
[dev-dependencies]
rand = { version = "0.8", default-features = false, features = ["std",
"std_rng"] }
criterion = { version = "0.5", default-features = false }
diff --git a/arrow/src/ffi.rs b/arrow-array/src/ffi.rs
similarity index 81%
rename from arrow/src/ffi.rs
rename to arrow-array/src/ffi.rs
index d33de9d655f..7b988bb0747 100644
--- a/arrow/src/ffi.rs
+++ b/arrow-array/src/ffi.rs
@@ -29,11 +29,11 @@
//!
//! ```rust
//! # use std::sync::Arc;
-//! # use arrow::array::{Int32Array, Array, ArrayData, make_array};
-//! # use arrow::error::Result;
-//! # use arrow_arith::numeric::add;
-//! # use arrow::ffi::{to_ffi, from_ffi};
-//! # fn main() -> Result<()> {
+//! # use arrow_array::{Int32Array, Array, make_array};
+//! # use arrow_data::ArrayData;
+//! # use arrow_array::ffi::{to_ffi, from_ffi};
+//! # use arrow_schema::ArrowError;
+//! # fn main() -> Result<(), ArrowError> {
//! // create an array natively
//!
//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
@@ -46,11 +46,8 @@
//! let data = unsafe { from_ffi(out_array, &out_schema) }?;
//! let array = Int32Array::from(data);
//!
-//! // perform some operation
-//! let array = add(&array, &array)?;
-//!
//! // verify
-//! assert_eq!(array.as_ref(), &Int32Array::from(vec![Some(2), None,
Some(6)]));
+//! assert_eq!(array, Int32Array::from(vec![Some(1), None, Some(3)]));
//! #
//! # Ok(())
//! # }
@@ -60,9 +57,9 @@
//!
//! ```
//! # use std::ptr::addr_of_mut;
-//! # use arrow::ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema};
+//! # use arrow_array::ffi::{from_ffi, FFI_ArrowArray};
//! # use arrow_array::{ArrayRef, make_array};
-//! # use arrow_schema::ArrowError;
+//! # use arrow_schema::{ArrowError, ffi::FFI_ArrowSchema};
//! #
//! /// A foreign data container that can export to C Data interface
//! struct ForeignArray {};
@@ -106,16 +103,39 @@ To export an array, create an `ArrowArray` using
[ArrowArray::try_new].
use std::{mem::size_of, ptr::NonNull, sync::Arc};
+use arrow_buffer::{bit_util, Buffer, MutableBuffer};
pub use arrow_data::ffi::FFI_ArrowArray;
-pub use arrow_schema::ffi::{FFI_ArrowSchema, Flags};
+use arrow_data::{layout, ArrayData};
+pub use arrow_schema::ffi::FFI_ArrowSchema;
+use arrow_schema::{ArrowError, DataType, UnionMode};
+
+use crate::array::ArrayRef;
+
+type Result<T> = std::result::Result<T, ArrowError>;
-use arrow_schema::UnionMode;
+/// Exports an array to raw pointers of the C Data Interface provided by the
consumer.
+/// # Safety
+/// Assumes that these pointers represent valid C Data Interfaces, both in
memory
+/// representation and lifetime via the `release` mechanism.
+///
+/// This function copies the content of two FFI structs
[arrow_data::ffi::FFI_ArrowArray] and
+/// [arrow_schema::ffi::FFI_ArrowSchema] in the array to the location pointed
by the raw pointers.
+/// Usually the raw pointers are provided by the array data consumer.
+#[deprecated(note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from")]
+pub unsafe fn export_array_into_raw(
+ src: ArrayRef,
+ out_array: *mut FFI_ArrowArray,
+ out_schema: *mut FFI_ArrowSchema,
+) -> Result<()> {
+ let data = src.to_data();
+ let array = FFI_ArrowArray::new(&data);
+ let schema = FFI_ArrowSchema::try_from(data.data_type())?;
-use crate::array::{layout, ArrayData};
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::datatypes::DataType;
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
+ std::ptr::write_unaligned(out_array, array);
+ std::ptr::write_unaligned(out_schema, schema);
+
+ Ok(())
+}
// returns the number of bits that buffer `i` (in the C data interface) is
expected to have.
// This is set by the Arrow specification
@@ -464,19 +484,17 @@ impl<'a> ImportedArrowArray<'a> {
}
#[cfg(test)]
-mod tests {
+mod tests_to_then_from_ffi {
use std::collections::HashMap;
use std::mem::ManuallyDrop;
- use std::ptr::addr_of_mut;
- use arrow_array::builder::UnionBuilder;
- use arrow_array::cast::AsArray;
- use arrow_array::types::{Float64Type, Int32Type};
- use arrow_array::*;
use arrow_buffer::NullBuffer;
+ use arrow_schema::Field;
- use crate::compute::kernels;
- use crate::datatypes::{Field, Int8Type};
+ use crate::builder::UnionBuilder;
+ use crate::cast::AsArray;
+ use crate::types::{Float64Type, Int32Type, Int8Type};
+ use crate::*;
use super::*;
@@ -490,10 +508,9 @@ mod tests {
// (simulate consumer) import it
let array = Int32Array::from(unsafe { from_ffi(array, &schema)
}.unwrap());
- let array = kernels::numeric::add(&array, &array).unwrap();
// verify
- assert_eq!(array.as_ref(), &Int32Array::from(vec![2, 4, 6]));
+ assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
}
#[test]
@@ -535,15 +552,9 @@ mod tests {
// (simulate consumer) import it
let data = unsafe { from_ffi(array, &schema) }?;
let array = make_array(data);
-
- // perform some operation
let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
- assert_eq!(array, &Int32Array::from(vec![Some(2), None]));
-
- let array = kernels::numeric::add(array, array).unwrap();
- // verify
- assert_eq!(array.as_ref(), &Int32Array::from(vec![Some(4), None]));
+ assert_eq!(array, &Int32Array::from(vec![Some(2), None]));
// (drop/release)
Ok(())
@@ -589,21 +600,13 @@ mod tests {
let array = make_array(data);
// perform some operation
- let array = kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).unwrap();
let array = array
.as_any()
.downcast_ref::<GenericStringArray<Offset>>()
.unwrap();
// verify
- let expected = GenericStringArray::<Offset>::from(vec![
- Some("a"),
- None,
- Some("aaa"),
- Some("a"),
- None,
- Some("aaa"),
- ]);
+ let expected = GenericStringArray::<Offset>::from(vec![Some("a"),
None, Some("aaa")]);
assert_eq!(array, &expected);
// (drop/release)
@@ -694,23 +697,13 @@ mod tests {
// (simulate consumer) import it
let data = unsafe { from_ffi(array, &schema) }?;
let array = make_array(data);
-
- // perform some operation
- let array = kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).unwrap();
let array = array
.as_any()
.downcast_ref::<GenericBinaryArray<Offset>>()
.unwrap();
// verify
- let expected: Vec<Option<&[u8]>> = vec![
- Some(b"a"),
- None,
- Some(b"aaa"),
- Some(b"a"),
- None,
- Some(b"aaa"),
- ];
+ let expected: Vec<Option<&[u8]>> = vec![Some(b"a"), None,
Some(b"aaa")];
let expected = GenericBinaryArray::<Offset>::from(expected);
assert_eq!(array, &expected);
@@ -739,15 +732,12 @@ mod tests {
// (simulate consumer) import it
let data = unsafe { from_ffi(array, &schema) }?;
let array = make_array(data);
-
- // perform some operation
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
- let array = kernels::boolean::not(array)?;
// verify
assert_eq!(
array,
- BooleanArray::from(vec![None, Some(false), Some(true)])
+ &BooleanArray::from(vec![None, Some(true), Some(false)])
);
// (drop/release)
@@ -765,9 +755,6 @@ mod tests {
// (simulate consumer) import it
let data = unsafe { from_ffi(array, &schema) }?;
let array = make_array(data);
-
- // perform some operation
- let array = kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).unwrap();
let array = array
.as_any()
.downcast_ref::<Time32MillisecondArray>()
@@ -776,7 +763,7 @@ mod tests {
// verify
assert_eq!(
array,
- &Time32MillisecondArray::from(vec![None, Some(1), Some(2), None,
Some(1), Some(2)])
+ &Time32MillisecondArray::from(vec![None, Some(1), Some(2)])
);
// (drop/release)
@@ -794,9 +781,6 @@ mod tests {
// (simulate consumer) import it
let data = unsafe { from_ffi(array, &schema) }?;
let array = make_array(data);
-
- // perform some operation
- let array = kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).unwrap();
let array = array
.as_any()
.downcast_ref::<TimestampMillisecondArray>()
@@ -805,7 +789,7 @@ mod tests {
// verify
assert_eq!(
array,
- &TimestampMillisecondArray::from(vec![None, Some(1), Some(2),
None, Some(1), Some(2)])
+ &TimestampMillisecondArray::from(vec![None, Some(1), Some(2)])
);
// (drop/release)
@@ -830,9 +814,6 @@ mod tests {
// (simulate consumer) import it
let data = unsafe { from_ffi(array, &schema) }?;
let array = make_array(data);
-
- // perform some operation
- let array = kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).unwrap();
let array = array
.as_any()
.downcast_ref::<FixedSizeBinaryArray>()
@@ -849,12 +830,6 @@ mod tests {
Some(vec![20, 20, 20]),
Some(vec![30, 30, 30]),
None,
- None,
- Some(vec![10, 10, 10]),
- None,
- Some(vec![20, 20, 20]),
- Some(vec![30, 30, 30]),
- None,
]
.into_iter(),
3
@@ -891,9 +866,6 @@ mod tests {
// (simulate consumer) import it
let data = unsafe { from_ffi(array, &schema) }?;
let array = make_array(data);
-
- // perform some operation
- let array = kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).unwrap();
let array =
array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
// 0010 0100
@@ -903,15 +875,14 @@ mod tests {
let mut w = vec![];
w.extend_from_slice(&v);
- w.extend_from_slice(&v);
let expected_value_data = ArrayData::builder(DataType::Int32)
- .len(18)
+ .len(9)
.add_buffer(Buffer::from_slice_ref(&w))
.build()?;
let expected_list_data = ArrayData::builder(list_data_type)
- .len(6)
+ .len(3)
.null_bit_buffer(Some(Buffer::from(expected_validity_bits)))
.add_child_data(expected_value_data)
.build()?;
@@ -936,16 +907,13 @@ mod tests {
// (simulate consumer) import it
let data = unsafe { from_ffi(array, &schema) }?;
let array = make_array(data);
-
- // perform some operation
- let array = kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).unwrap();
let actual = array
.as_any()
.downcast_ref::<DictionaryArray<Int8Type>>()
.unwrap();
// verify
- let new_values = vec!["a", "aaa", "aaa", "a", "aaa", "aaa"];
+ let new_values = vec!["a", "aaa", "aaa"];
let expected: DictionaryArray<Int8Type> =
new_values.into_iter().collect();
assert_eq!(actual, &expected);
@@ -956,7 +924,6 @@ mod tests {
#[test]
#[allow(deprecated)]
fn test_export_array_into_raw() -> Result<()> {
- use crate::array::export_array_into_raw;
let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());
// Assume two raw pointers provided by the consumer
@@ -964,8 +931,8 @@ mod tests {
let mut out_schema = FFI_ArrowSchema::empty();
{
- let out_array_ptr = addr_of_mut!(out_array);
- let out_schema_ptr = addr_of_mut!(out_schema);
+ let out_array_ptr = std::ptr::addr_of_mut!(out_array);
+ let out_schema_ptr = std::ptr::addr_of_mut!(out_schema);
unsafe {
export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
}
@@ -977,10 +944,9 @@ mod tests {
// perform some operation
let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
- let array = kernels::numeric::add(array, array).unwrap();
// verify
- assert_eq!(array.as_ref(), &Int32Array::from(vec![2, 4, 6]));
+ assert_eq!(array, &Int32Array::from(vec![1, 2, 3]));
Ok(())
}
@@ -995,9 +961,6 @@ mod tests {
// (simulate consumer) import it
let data = unsafe { from_ffi(array, &schema) }?;
let array = make_array(data);
-
- // perform some operation
- let array = kernels::concat::concat(&[array.as_ref(),
array.as_ref()]).unwrap();
let array = array
.as_any()
.downcast_ref::<DurationSecondArray>()
@@ -1006,7 +969,7 @@ mod tests {
// verify
assert_eq!(
array,
- &DurationSecondArray::from(vec![None, Some(1), Some(2), None,
Some(1), Some(2)])
+ &DurationSecondArray::from(vec![None, Some(1), Some(2)])
);
// (drop/release)
@@ -1248,3 +1211,213 @@ mod tests {
Ok(())
}
}
+
+#[cfg(test)]
+mod tests_from_ffi {
+ use std::sync::Arc;
+
+ use arrow_buffer::{bit_util, buffer::Buffer};
+ use arrow_data::ArrayData;
+ use arrow_schema::{DataType, Field};
+
+ use crate::{
+ array::{
+ Array, BooleanArray, DictionaryArray, FixedSizeBinaryArray,
FixedSizeListArray,
+ Int32Array, Int64Array, StringArray, StructArray, UInt32Array,
UInt64Array,
+ },
+ ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema},
+ };
+
+ use super::Result;
+
+ fn test_round_trip(expected: &ArrayData) -> Result<()> {
+ // here we export the array
+ let array = FFI_ArrowArray::new(expected);
+ let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
+
+ // simulate an external consumer by being the consumer
+ let result = &unsafe { from_ffi(array, &schema) }?;
+
+ assert_eq!(result, expected);
+ Ok(())
+ }
+
+ #[test]
+ fn test_u32() -> Result<()> {
+ let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+
+ #[test]
+ fn test_u64() -> Result<()> {
+ let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+
+ #[test]
+ fn test_i64() -> Result<()> {
+ let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+
+ #[test]
+ fn test_struct() -> Result<()> {
+ let inner = StructArray::from(vec![
+ (
+ Arc::new(Field::new("a1", DataType::Boolean, false)),
+ Arc::new(BooleanArray::from(vec![true, true, false, false]))
as Arc<dyn Array>,
+ ),
+ (
+ Arc::new(Field::new("a2", DataType::UInt32, false)),
+ Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
+ ),
+ ]);
+
+ let array = StructArray::from(vec![
+ (
+ Arc::new(Field::new("a", inner.data_type().clone(), false)),
+ Arc::new(inner) as Arc<dyn Array>,
+ ),
+ (
+ Arc::new(Field::new("b", DataType::Boolean, false)),
+ Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
+ ),
+ (
+ Arc::new(Field::new("c", DataType::UInt32, false)),
+ Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
+ ),
+ ]);
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+
+ #[test]
+ fn test_dictionary() -> Result<()> {
+ let values = StringArray::from(vec![Some("foo"), Some("bar"), None]);
+ let keys = Int32Array::from(vec![
+ Some(0),
+ Some(1),
+ None,
+ Some(1),
+ Some(1),
+ None,
+ Some(1),
+ Some(2),
+ Some(1),
+ None,
+ ]);
+ let array = DictionaryArray::new(keys, Arc::new(values));
+
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+
+ #[test]
+ fn test_fixed_size_binary() -> Result<()> {
+ let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30,
30]];
+ let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;
+
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+
+ #[test]
+ fn test_fixed_size_binary_with_nulls() -> Result<()> {
+ let values = vec![
+ None,
+ Some(vec![10, 10, 10]),
+ None,
+ Some(vec![20, 20, 20]),
+ Some(vec![30, 30, 30]),
+ None,
+ ];
+ let array =
FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
+
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+
+ #[test]
+ fn test_fixed_size_list() -> Result<()> {
+ let v: Vec<i64> = (0..9).collect();
+ let value_data = ArrayData::builder(DataType::Int64)
+ .len(9)
+ .add_buffer(Buffer::from_slice_ref(v))
+ .build()?;
+ let list_data_type =
+ DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64,
false)), 3);
+ let list_data = ArrayData::builder(list_data_type)
+ .len(3)
+ .add_child_data(value_data)
+ .build()?;
+ let array = FixedSizeListArray::from(list_data);
+
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+
+ #[test]
+ fn test_fixed_size_list_with_nulls() -> Result<()> {
+ // 0100 0110
+ let mut validity_bits: [u8; 1] = [0; 1];
+ bit_util::set_bit(&mut validity_bits, 1);
+ bit_util::set_bit(&mut validity_bits, 2);
+ bit_util::set_bit(&mut validity_bits, 6);
+
+ let v: Vec<i16> = (0..16).collect();
+ let value_data = ArrayData::builder(DataType::Int16)
+ .len(16)
+ .add_buffer(Buffer::from_slice_ref(v))
+ .build()?;
+ let list_data_type =
+ DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16,
false)), 2);
+ let list_data = ArrayData::builder(list_data_type)
+ .len(8)
+ .null_bit_buffer(Some(Buffer::from(validity_bits)))
+ .add_child_data(value_data)
+ .build()?;
+ let array = FixedSizeListArray::from(list_data);
+
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+
+ #[test]
+ fn test_fixed_size_list_nested() -> Result<()> {
+ let v: Vec<i32> = (0..16).collect();
+ let value_data = ArrayData::builder(DataType::Int32)
+ .len(16)
+ .add_buffer(Buffer::from_slice_ref(v))
+ .build()?;
+
+ let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
+ let value_offsets = Buffer::from_slice_ref(offsets);
+ let inner_list_data_type =
+ DataType::List(Arc::new(Field::new("item", DataType::Int32,
false)));
+ let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
+ .len(8)
+ .add_buffer(value_offsets)
+ .add_child_data(value_data)
+ .build()?;
+
+ // 0000 0100
+ let mut validity_bits: [u8; 1] = [0; 1];
+ bit_util::set_bit(&mut validity_bits, 2);
+
+ let list_data_type =
+ DataType::FixedSizeList(Arc::new(Field::new("f",
inner_list_data_type, false)), 2);
+ let list_data = ArrayData::builder(list_data_type)
+ .len(4)
+ .null_bit_buffer(Some(Buffer::from(validity_bits)))
+ .add_child_data(inner_list_data)
+ .build()?;
+
+ let array = FixedSizeListArray::from(list_data);
+
+ let data = array.into_data();
+ test_round_trip(&data)
+ }
+}
diff --git a/arrow/src/ffi_stream.rs b/arrow-array/src/ffi_stream.rs
similarity index 98%
rename from arrow/src/ffi_stream.rs
rename to arrow-array/src/ffi_stream.rs
index 15b88ef3216..6f3405ead7b 100644
--- a/arrow/src/ffi_stream.rs
+++ b/arrow-array/src/ffi_stream.rs
@@ -63,14 +63,16 @@ use std::{
sync::Arc,
};
+use arrow_data::ffi::FFI_ArrowArray;
+use arrow_schema::{ffi::FFI_ArrowSchema, ArrowError, Schema, SchemaRef};
+
use crate::array::Array;
use crate::array::StructArray;
-use crate::datatypes::{Schema, SchemaRef};
-use crate::error::ArrowError;
-use crate::error::Result;
-use crate::ffi::*;
+use crate::ffi::from_ffi_and_data_type;
use crate::record_batch::{RecordBatch, RecordBatchReader};
+type Result<T> = std::result::Result<T, ArrowError>;
+
const ENOMEM: i32 = 12;
const EIO: i32 = 5;
const EINVAL: i32 = 22;
@@ -81,6 +83,7 @@ const ENOSYS: i32 = 78;
/// This was created by bindgen
#[repr(C)]
#[derive(Debug)]
+#[allow(missing_docs)]
pub struct FFI_ArrowArrayStream {
pub get_schema: Option<
unsafe extern "C" fn(arg1: *mut FFI_ArrowArrayStream, out: *mut
FFI_ArrowSchema) -> c_int,
@@ -393,8 +396,10 @@ pub unsafe fn export_reader_into_raw(
mod tests {
use super::*;
+ use arrow_schema::Field;
+
use crate::array::Int32Array;
- use crate::datatypes::Field;
+ use crate::ffi::from_ffi;
struct TestRecordBatchReader {
schema: SchemaRef,
diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs
index ef98c5efefb..90bc5e31205 100644
--- a/arrow-array/src/lib.rs
+++ b/arrow-array/src/lib.rs
@@ -197,6 +197,10 @@ pub use scalar::*;
pub mod builder;
pub mod cast;
mod delta;
+#[cfg(feature = "ffi")]
+pub mod ffi;
+#[cfg(feature = "ffi")]
+pub mod ffi_stream;
pub mod iterator;
pub mod run_iterator;
pub mod temporal_conversions;
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index a938d75b1a6..9d3c431b304 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -77,7 +77,7 @@ pyarrow = ["pyo3", "ffi"]
# but is run as part of our CI checks
force_validate = ["arrow-data/force_validate"]
# Enable ffi support
-ffi = ["arrow-schema/ffi", "arrow-data/ffi"]
+ffi = ["arrow-schema/ffi", "arrow-data/ffi", "arrow-array/ffi"]
chrono-tz = ["arrow-array/chrono-tz"]
[dev-dependencies]
diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs
deleted file mode 100644
index 43f54a03842..00000000000
--- a/arrow/src/array/ffi.rs
+++ /dev/null
@@ -1,254 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Contains functionality to load an ArrayData from the C Data Interface
-
-use crate::{error::Result, ffi};
-
-use super::ArrayRef;
-
-/// Exports an array to raw pointers of the C Data Interface provided by the
consumer.
-/// # Safety
-/// Assumes that these pointers represent valid C Data Interfaces, both in
memory
-/// representation and lifetime via the `release` mechanism.
-///
-/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray]
and
-/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw
pointers.
-/// Usually the raw pointers are provided by the array data consumer.
-#[deprecated(note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from")]
-pub unsafe fn export_array_into_raw(
- src: ArrayRef,
- out_array: *mut ffi::FFI_ArrowArray,
- out_schema: *mut ffi::FFI_ArrowSchema,
-) -> Result<()> {
- let data = src.to_data();
- let array = ffi::FFI_ArrowArray::new(&data);
- let schema = ffi::FFI_ArrowSchema::try_from(data.data_type())?;
-
- std::ptr::write_unaligned(out_array, array);
- std::ptr::write_unaligned(out_schema, schema);
-
- Ok(())
-}
-
-#[cfg(test)]
-mod tests {
- use crate::array::{DictionaryArray, FixedSizeListArray, Int32Array,
StringArray};
- use crate::buffer::Buffer;
- use crate::error::Result;
- use crate::util::bit_util;
- use crate::{
- array::{
- Array, ArrayData, BooleanArray, FixedSizeBinaryArray, Int64Array,
StructArray,
- UInt32Array, UInt64Array,
- },
- datatypes::{DataType, Field},
- ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema},
- };
- use std::sync::Arc;
-
- fn test_round_trip(expected: &ArrayData) -> Result<()> {
- // here we export the array
- let array = FFI_ArrowArray::new(expected);
- let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
-
- // simulate an external consumer by being the consumer
- let result = &unsafe { from_ffi(array, &schema) }?;
-
- assert_eq!(result, expected);
- Ok(())
- }
-
- #[test]
- fn test_u32() -> Result<()> {
- let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
- let data = array.into_data();
- test_round_trip(&data)
- }
-
- #[test]
- fn test_u64() -> Result<()> {
- let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
- let data = array.into_data();
- test_round_trip(&data)
- }
-
- #[test]
- fn test_i64() -> Result<()> {
- let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
- let data = array.into_data();
- test_round_trip(&data)
- }
-
- #[test]
- fn test_struct() -> Result<()> {
- let inner = StructArray::from(vec![
- (
- Arc::new(Field::new("a1", DataType::Boolean, false)),
- Arc::new(BooleanArray::from(vec![true, true, false, false]))
as Arc<dyn Array>,
- ),
- (
- Arc::new(Field::new("a2", DataType::UInt32, false)),
- Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
- ),
- ]);
-
- let array = StructArray::from(vec![
- (
- Arc::new(Field::new("a", inner.data_type().clone(), false)),
- Arc::new(inner) as Arc<dyn Array>,
- ),
- (
- Arc::new(Field::new("b", DataType::Boolean, false)),
- Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
- ),
- (
- Arc::new(Field::new("c", DataType::UInt32, false)),
- Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
- ),
- ]);
- let data = array.into_data();
- test_round_trip(&data)
- }
-
- #[test]
- fn test_dictionary() -> Result<()> {
- let values = StringArray::from(vec![Some("foo"), Some("bar"), None]);
- let keys = Int32Array::from(vec![
- Some(0),
- Some(1),
- None,
- Some(1),
- Some(1),
- None,
- Some(1),
- Some(2),
- Some(1),
- None,
- ]);
- let array = DictionaryArray::new(keys, Arc::new(values));
-
- let data = array.into_data();
- test_round_trip(&data)
- }
-
- #[test]
- fn test_fixed_size_binary() -> Result<()> {
- let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30,
30]];
- let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;
-
- let data = array.into_data();
- test_round_trip(&data)
- }
-
- #[test]
- fn test_fixed_size_binary_with_nulls() -> Result<()> {
- let values = vec![
- None,
- Some(vec![10, 10, 10]),
- None,
- Some(vec![20, 20, 20]),
- Some(vec![30, 30, 30]),
- None,
- ];
- let array =
FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
-
- let data = array.into_data();
- test_round_trip(&data)
- }
-
- #[test]
- fn test_fixed_size_list() -> Result<()> {
- let v: Vec<i64> = (0..9).collect();
- let value_data = ArrayData::builder(DataType::Int64)
- .len(9)
- .add_buffer(Buffer::from_slice_ref(v))
- .build()?;
- let list_data_type =
- DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64,
false)), 3);
- let list_data = ArrayData::builder(list_data_type)
- .len(3)
- .add_child_data(value_data)
- .build()?;
- let array = FixedSizeListArray::from(list_data);
-
- let data = array.into_data();
- test_round_trip(&data)
- }
-
- #[test]
- fn test_fixed_size_list_with_nulls() -> Result<()> {
- // 0100 0110
- let mut validity_bits: [u8; 1] = [0; 1];
- bit_util::set_bit(&mut validity_bits, 1);
- bit_util::set_bit(&mut validity_bits, 2);
- bit_util::set_bit(&mut validity_bits, 6);
-
- let v: Vec<i16> = (0..16).collect();
- let value_data = ArrayData::builder(DataType::Int16)
- .len(16)
- .add_buffer(Buffer::from_slice_ref(v))
- .build()?;
- let list_data_type =
- DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16,
false)), 2);
- let list_data = ArrayData::builder(list_data_type)
- .len(8)
- .null_bit_buffer(Some(Buffer::from(validity_bits)))
- .add_child_data(value_data)
- .build()?;
- let array = FixedSizeListArray::from(list_data);
-
- let data = array.into_data();
- test_round_trip(&data)
- }
-
- #[test]
- fn test_fixed_size_list_nested() -> Result<()> {
- let v: Vec<i32> = (0..16).collect();
- let value_data = ArrayData::builder(DataType::Int32)
- .len(16)
- .add_buffer(Buffer::from_slice_ref(v))
- .build()?;
-
- let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
- let value_offsets = Buffer::from_slice_ref(offsets);
- let inner_list_data_type =
- DataType::List(Arc::new(Field::new("item", DataType::Int32,
false)));
- let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
- .len(8)
- .add_buffer(value_offsets)
- .add_child_data(value_data)
- .build()?;
-
- // 0000 0100
- let mut validity_bits: [u8; 1] = [0; 1];
- bit_util::set_bit(&mut validity_bits, 2);
-
- let list_data_type =
- DataType::FixedSizeList(Arc::new(Field::new("f",
inner_list_data_type, false)), 2);
- let list_data = ArrayData::builder(list_data_type)
- .len(4)
- .null_bit_buffer(Some(Buffer::from(validity_bits)))
- .add_child_data(inner_list_data)
- .build()?;
-
- let array = FixedSizeListArray::from(list_data);
-
- let data = array.into_data();
- test_round_trip(&data)
- }
-}
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index fa01f4c4c15..b563c320bb6 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -19,9 +19,6 @@
//!
//! **See [arrow_array] for examples and usage instructions**
-#[cfg(feature = "ffi")]
-mod ffi;
-
// --------------------- Array & ArrayData ---------------------
pub use arrow_array::builder::*;
pub use arrow_array::cast::*;
@@ -35,7 +32,7 @@ pub use arrow_data::transform::{Capacities, MutableArrayData};
#[cfg(feature = "ffi")]
#[allow(deprecated)]
-pub use self::ffi::export_array_into_raw;
+pub use arrow_array::ffi::export_array_into_raw;
// --------------------- Array's values comparison ---------------------
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 78e2363e482..09d6fc48aef 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -363,9 +363,9 @@ pub use arrow_csv as csv;
pub mod datatypes;
pub mod error;
#[cfg(feature = "ffi")]
-pub mod ffi;
+pub use arrow_array::ffi;
#[cfg(feature = "ffi")]
-pub mod ffi_stream;
+pub use arrow_array::ffi_stream;
#[cfg(feature = "ipc")]
pub use arrow_ipc as ipc;
#[cfg(feature = "json")]