This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new fb016566e Split out arrow-buffer crate (#2594) (#2693)
fb016566e is described below
commit fb016566ea4f46d461230e1586f7bb95c29d5934
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Sep 15 15:30:54 2022 +0100
Split out arrow-buffer crate (#2594) (#2693)
* Split out arrow-buffer crate (#2594)
* Fix doc
* Review feedback
* Review feedback
* Use 64-bit wide collect_bool
---
Cargo.toml | 3 +-
arrow-buffer/Cargo.toml | 47 ++++
{arrow => arrow-buffer}/src/alloc/alignment.rs | 0
{arrow => arrow-buffer}/src/alloc/mod.rs | 32 +--
{arrow => arrow-buffer}/src/buffer/immutable.rs | 4 +-
{arrow/src/util => arrow-buffer/src/buffer}/mod.rs | 29 +--
{arrow => arrow-buffer}/src/buffer/mutable.rs | 45 ++--
{arrow => arrow-buffer}/src/buffer/ops.rs | 21 +-
{arrow => arrow-buffer}/src/buffer/scalar.rs | 2 +-
{arrow => arrow-buffer}/src/bytes.rs | 2 +-
arrow/src/util/mod.rs => arrow-buffer/src/lib.rs | 26 +-
.../src/datatypes => arrow-buffer/src}/native.rs | 129 ----------
.../src/util/bit_chunk_iterator.rs | 4 +-
{arrow => arrow-buffer}/src/util/bit_util.rs | 0
{arrow => arrow-buffer}/src/util/mod.rs | 19 --
arrow/Cargo.toml | 2 +
arrow/benches/buffer_bit_ops.rs | 6 +-
arrow/src/alloc/types.rs | 73 ------
arrow/src/array/array_list.rs | 4 +-
arrow/src/bitmap.rs | 29 ++-
arrow/src/buffer/mod.rs | 72 ------
arrow/src/compute/kernels/boolean.rs | 73 +++---
arrow/src/datatypes/native.rs | 286 +--------------------
arrow/src/lib.rs | 5 +-
arrow/src/util/mod.rs | 4 +-
25 files changed, 183 insertions(+), 734 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 9bf55c0f2..d0233ccb3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,10 +18,11 @@
[workspace]
members = [
"arrow",
+ "arrow-buffer",
+ "arrow-flight",
"parquet",
"parquet_derive",
"parquet_derive_test",
- "arrow-flight",
"integration-testing",
"object_store",
]
diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml
new file mode 100644
index 000000000..87019111e
--- /dev/null
+++ b/arrow-buffer/Cargo.toml
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "arrow-buffer"
+version = "22.0.0"
+description = "Buffer abstractions for Apache Arrow"
+homepage = "https://github.com/apache/arrow-rs"
+repository = "https://github.com/apache/arrow-rs"
+authors = ["Apache Arrow <[email protected]>"]
+license = "Apache-2.0"
+keywords = ["arrow"]
+include = [
+ "benches/*.rs",
+ "src/**/*.rs",
+ "Cargo.toml",
+]
+edition = "2021"
+rust-version = "1.62"
+
+[lib]
+name = "arrow_buffer"
+path = "src/lib.rs"
+bench = false
+
+[dependencies]
+num = { version = "0.4", default-features = false, features = ["std"] }
+half = { version = "2.0", default-features = false }
+
+[dev-dependencies]
+rand = { version = "0.8", default-features = false, features = ["std",
"std_rng"] }
+
+[build-dependencies]
diff --git a/arrow/src/alloc/alignment.rs b/arrow-buffer/src/alloc/alignment.rs
similarity index 100%
rename from arrow/src/alloc/alignment.rs
rename to arrow-buffer/src/alloc/alignment.rs
diff --git a/arrow/src/alloc/mod.rs b/arrow-buffer/src/alloc/mod.rs
similarity index 84%
rename from arrow/src/alloc/mod.rs
rename to arrow-buffer/src/alloc/mod.rs
index 526850685..6b09c4b31 100644
--- a/arrow/src/alloc/mod.rs
+++ b/arrow-buffer/src/alloc/mod.rs
@@ -20,34 +20,29 @@
use std::alloc::{handle_alloc_error, Layout};
use std::fmt::{Debug, Formatter};
-use std::mem::size_of;
use std::panic::RefUnwindSafe;
use std::ptr::NonNull;
use std::sync::Arc;
mod alignment;
-mod types;
pub use alignment::ALIGNMENT;
-pub use types::NativeType;
#[inline]
-unsafe fn null_pointer<T: NativeType>() -> NonNull<T> {
- NonNull::new_unchecked(ALIGNMENT as *mut T)
+unsafe fn null_pointer() -> NonNull<u8> {
+ NonNull::new_unchecked(ALIGNMENT as *mut u8)
}
/// Allocates a cache-aligned memory region of `size` bytes with uninitialized
values.
/// This is more performant than using [allocate_aligned_zeroed] when all
bytes will have
/// an unknown or non-zero value and is semantically similar to `malloc`.
-pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
+pub fn allocate_aligned(size: usize) -> NonNull<u8> {
unsafe {
if size == 0 {
null_pointer()
} else {
- let size = size * size_of::<T>();
-
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
- let raw_ptr = std::alloc::alloc(layout) as *mut T;
+ let raw_ptr = std::alloc::alloc(layout);
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
}
}
@@ -56,15 +51,13 @@ pub fn allocate_aligned<T: NativeType>(size: usize) ->
NonNull<T> {
/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of
them.
/// This is more performant than using [allocate_aligned] and setting all
bytes to zero
/// and is semantically similar to `calloc`.
-pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
+pub fn allocate_aligned_zeroed(size: usize) -> NonNull<u8> {
unsafe {
if size == 0 {
null_pointer()
} else {
- let size = size * size_of::<T>();
-
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
- let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T;
+ let raw_ptr = std::alloc::alloc_zeroed(layout);
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
}
}
@@ -78,9 +71,8 @@ pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) ->
NonNull<T> {
/// * ptr must denote a block of memory currently allocated via this allocator,
///
/// * size must be the same size that was used to allocate that block of
memory,
-pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
+pub unsafe fn free_aligned(ptr: NonNull<u8>, size: usize) {
if ptr != null_pointer() {
- let size = size * size_of::<T>();
std::alloc::dealloc(
ptr.as_ptr() as *mut u8,
Layout::from_size_align_unchecked(size, ALIGNMENT),
@@ -99,13 +91,11 @@ pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>,
size: usize) {
///
/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must
not overflow (i.e.,
/// the rounded value must be less than usize::MAX).
-pub unsafe fn reallocate<T: NativeType>(
- ptr: NonNull<T>,
+pub unsafe fn reallocate(
+ ptr: NonNull<u8>,
old_size: usize,
new_size: usize,
-) -> NonNull<T> {
- let old_size = old_size * size_of::<T>();
- let new_size = new_size * size_of::<T>();
+) -> NonNull<u8> {
if ptr == null_pointer() {
return allocate_aligned(new_size);
}
@@ -119,7 +109,7 @@ pub unsafe fn reallocate<T: NativeType>(
ptr.as_ptr() as *mut u8,
Layout::from_size_align_unchecked(old_size, ALIGNMENT),
new_size,
- ) as *mut T;
+ );
NonNull::new(raw_ptr).unwrap_or_else(|| {
handle_alloc_error(Layout::from_size_align_unchecked(new_size,
ALIGNMENT))
})
diff --git a/arrow/src/buffer/immutable.rs
b/arrow-buffer/src/buffer/immutable.rs
similarity index 99%
rename from arrow/src/buffer/immutable.rs
rename to arrow-buffer/src/buffer/immutable.rs
index 6d4d624ef..c60d28afc 100644
--- a/arrow/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -23,7 +23,7 @@ use std::{convert::AsRef, usize};
use crate::alloc::{Allocation, Deallocation};
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
-use crate::{bytes::Bytes, datatypes::ArrowNativeType};
+use crate::{bytes::Bytes, native::ArrowNativeType};
use super::ops::bitwise_unary_op_helper;
use super::MutableBuffer;
@@ -271,7 +271,7 @@ impl Buffer {
/// Prefer this to `collect` whenever possible, as it is ~60% faster.
/// # Example
/// ```
- /// # use arrow::buffer::Buffer;
+ /// # use arrow_buffer::buffer::Buffer;
/// let v = vec![1u32];
/// let iter = v.iter().map(|x| x * 2);
/// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
diff --git a/arrow/src/util/mod.rs b/arrow-buffer/src/buffer/mod.rs
similarity index 62%
copy from arrow/src/util/mod.rs
copy to arrow-buffer/src/buffer/mod.rs
index 6f68398e7..b9201f774 100644
--- a/arrow/src/util/mod.rs
+++ b/arrow-buffer/src/buffer/mod.rs
@@ -15,24 +15,15 @@
// specific language governing permissions and limitations
// under the License.
-#[cfg(feature = "test_utils")]
-pub mod bench_util;
-pub mod bit_chunk_iterator;
-pub mod bit_iterator;
-pub(crate) mod bit_mask;
-pub mod bit_util;
-#[cfg(feature = "test_utils")]
-pub mod data_gen;
-pub mod display;
-#[cfg(feature = "prettyprint")]
-pub mod pretty;
-pub(crate) mod serialization;
-pub mod string_writer;
-#[cfg(any(test, feature = "test_utils"))]
-pub mod test_util;
+//! This module contains two main structs: [Buffer] and [MutableBuffer]. A
buffer represents
+//! a contiguous memory region that can be shared via `offsets`.
-mod trusted_len;
-pub(crate) use trusted_len::trusted_len_unzip;
+mod immutable;
+pub use immutable::*;
+mod mutable;
+pub use mutable::*;
+mod ops;
+mod scalar;
+pub use scalar::*;
-pub mod decimal;
-pub(crate) mod reader_parser;
+pub use ops::*;
diff --git a/arrow/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs
similarity index 95%
rename from arrow/src/buffer/mutable.rs
rename to arrow-buffer/src/buffer/mutable.rs
index d1e633993..80644b63d 100644
--- a/arrow/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -20,7 +20,7 @@ use crate::alloc::Deallocation;
use crate::{
alloc,
bytes::Bytes,
- datatypes::{ArrowNativeType, ToByteSlice},
+ native::{ArrowNativeType, ToByteSlice},
util::bit_util,
};
use std::ptr::NonNull;
@@ -31,12 +31,12 @@ use std::ptr::NonNull;
/// Use [MutableBuffer::push] to insert an item,
[MutableBuffer::extend_from_slice]
/// to insert many items, and `into` to convert it to [`Buffer`].
///
-/// For a safe, strongly typed API consider using
[`crate::array::BufferBuilder`]
+/// For a safe, strongly typed API consider using `arrow::array::BufferBuilder`
///
/// # Example
///
/// ```
-/// # use arrow::buffer::{Buffer, MutableBuffer};
+/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
/// let mut buffer = MutableBuffer::new(0);
/// buffer.push(256u32);
/// buffer.extend_from_slice(&[1u32]);
@@ -75,7 +75,7 @@ impl MutableBuffer {
/// all bytes are guaranteed to be `0u8`.
/// # Example
/// ```
- /// # use arrow::buffer::{Buffer, MutableBuffer};
+ /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
/// let mut buffer = MutableBuffer::from_len_zeroed(127);
/// assert_eq!(buffer.len(), 127);
/// assert!(buffer.capacity() >= 127);
@@ -131,7 +131,7 @@ impl MutableBuffer {
/// `self.len + additional > capacity`.
/// # Example
/// ```
- /// # use arrow::buffer::{Buffer, MutableBuffer};
+ /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
/// let mut buffer = MutableBuffer::new(0);
/// buffer.reserve(253); // allocates for the first time
/// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation
@@ -171,7 +171,7 @@ impl MutableBuffer {
/// growing it (potentially reallocating it) and writing `value` in the
newly available bytes.
/// # Example
/// ```
- /// # use arrow::buffer::{Buffer, MutableBuffer};
+ /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
/// let mut buffer = MutableBuffer::new(0);
/// buffer.resize(253, 2); // allocates for the first time
/// assert_eq!(buffer.as_slice()[252], 2u8);
@@ -195,7 +195,7 @@ impl MutableBuffer {
///
/// # Example
/// ```
- /// # use arrow::buffer::{Buffer, MutableBuffer};
+ /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
/// // 2 cache lines
/// let mut buffer = MutableBuffer::new(128);
/// assert_eq!(buffer.capacity(), 128);
@@ -322,7 +322,7 @@ impl MutableBuffer {
/// Extends this buffer from a slice of items that can be represented in
bytes, increasing its capacity if needed.
/// # Example
/// ```
- /// # use arrow::buffer::MutableBuffer;
+ /// # use arrow_buffer::buffer::MutableBuffer;
/// let mut buffer = MutableBuffer::new(0);
/// buffer.extend_from_slice(&[2u32, 0]);
/// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
@@ -346,7 +346,7 @@ impl MutableBuffer {
/// Extends the buffer with a new item, increasing its capacity if needed.
/// # Example
/// ```
- /// # use arrow::buffer::MutableBuffer;
+ /// # use arrow_buffer::buffer::MutableBuffer;
/// let mut buffer = MutableBuffer::new(0);
/// buffer.push(256u32);
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
@@ -384,7 +384,7 @@ impl MutableBuffer {
/// # Safety
/// The caller must ensure that the buffer was properly initialized up to
`len`.
#[inline]
- pub(crate) unsafe fn set_len(&mut self, len: usize) {
+ pub unsafe fn set_len(&mut self, len: usize) {
assert!(len <= self.capacity());
self.len = len;
}
@@ -394,16 +394,16 @@ impl MutableBuffer {
/// This is similar to `from_trusted_len_iter_bool`, however, can be
significantly faster
/// as it eliminates the conditional `Iterator::next`
#[inline]
- pub(crate) fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F)
-> Self {
- let mut buffer = Self::new(bit_util::ceil(len, 8));
+ pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self
{
+ let mut buffer = Self::new(bit_util::ceil(len, 64) * 8);
- let chunks = len / 8;
- let remainder = len % 8;
+ let chunks = len / 64;
+ let remainder = len % 64;
for chunk in 0..chunks {
let mut packed = 0;
- for bit_idx in 0..8 {
- let i = bit_idx + chunk * 8;
- packed |= (f(i) as u8) << bit_idx;
+ for bit_idx in 0..64 {
+ let i = bit_idx + chunk * 64;
+ packed |= (f(i) as u64) << bit_idx;
}
// SAFETY: Already allocated sufficient capacity
@@ -413,14 +413,15 @@ impl MutableBuffer {
if remainder != 0 {
let mut packed = 0;
for bit_idx in 0..remainder {
- let i = bit_idx + chunks * 8;
- packed |= (f(i) as u8) << bit_idx;
+ let i = bit_idx + chunks * 64;
+ packed |= (f(i) as u64) << bit_idx;
}
// SAFETY: Already allocated sufficient capacity
unsafe { buffer.push_unchecked(packed) }
}
+ buffer.truncate(bit_util::ceil(len, 8));
buffer
}
}
@@ -484,7 +485,7 @@ impl MutableBuffer {
/// Prefer this to `collect` whenever possible, as it is faster ~60%
faster.
/// # Example
/// ```
- /// # use arrow::buffer::MutableBuffer;
+ /// # use arrow_buffer::buffer::MutableBuffer;
/// let v = vec![1u32];
/// let iter = v.iter().map(|x| x * 2);
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
@@ -525,10 +526,10 @@ impl MutableBuffer {
}
/// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted
(upper) length.
- /// # use arrow::buffer::MutableBuffer;
+ /// # use arrow_buffer::buffer::MutableBuffer;
/// # Example
/// ```
- /// # use arrow::buffer::MutableBuffer;
+ /// # use arrow_buffer::buffer::MutableBuffer;
/// let v = vec![false, true, false];
/// let iter = v.iter().map(|x| *x || true);
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter)
};
diff --git a/arrow/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
similarity index 89%
rename from arrow/src/buffer/ops.rs
rename to arrow-buffer/src/buffer/ops.rs
index 7000f3976..c1295ad9a 100644
--- a/arrow/src/buffer/ops.rs
+++ b/arrow-buffer/src/buffer/ops.rs
@@ -20,26 +20,19 @@ use crate::util::bit_util::ceil;
/// Apply a bitwise operation `op` to four inputs and return the result as a
Buffer.
/// The inputs are treated as bitmaps, meaning that offsets and length are
specified in number of bits.
-#[allow(clippy::too_many_arguments)]
-pub(crate) fn bitwise_quaternary_op_helper<F>(
- first: &Buffer,
- first_offset_in_bits: usize,
- second: &Buffer,
- second_offset_in_bits: usize,
- third: &Buffer,
- third_offset_in_bits: usize,
- fourth: &Buffer,
- fourth_offset_in_bits: usize,
+pub fn bitwise_quaternary_op_helper<F>(
+ buffers: [&Buffer; 4],
+ offsets: [usize; 4],
len_in_bits: usize,
op: F,
) -> Buffer
where
F: Fn(u64, u64, u64, u64) -> u64,
{
- let first_chunks = first.bit_chunks(first_offset_in_bits, len_in_bits);
- let second_chunks = second.bit_chunks(second_offset_in_bits, len_in_bits);
- let third_chunks = third.bit_chunks(third_offset_in_bits, len_in_bits);
- let fourth_chunks = fourth.bit_chunks(fourth_offset_in_bits, len_in_bits);
+ let first_chunks = buffers[0].bit_chunks(offsets[0], len_in_bits);
+ let second_chunks = buffers[1].bit_chunks(offsets[1], len_in_bits);
+ let third_chunks = buffers[2].bit_chunks(offsets[2], len_in_bits);
+ let fourth_chunks = buffers[3].bit_chunks(offsets[3], len_in_bits);
let chunks = first_chunks
.iter()
diff --git a/arrow/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs
similarity index 99%
rename from arrow/src/buffer/scalar.rs
rename to arrow-buffer/src/buffer/scalar.rs
index 7d663cd2b..a9f2df3d9 100644
--- a/arrow/src/buffer/scalar.rs
+++ b/arrow-buffer/src/buffer/scalar.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::buffer::Buffer;
-use crate::datatypes::ArrowNativeType;
+use crate::native::ArrowNativeType;
use std::ops::Deref;
/// Provides a safe API for interpreting a [`Buffer`] as a slice of
[`ArrowNativeType`]
diff --git a/arrow/src/bytes.rs b/arrow-buffer/src/bytes.rs
similarity index 98%
rename from arrow/src/bytes.rs
rename to arrow-buffer/src/bytes.rs
index 75137a552..20bf5a474 100644
--- a/arrow/src/bytes.rs
+++ b/arrow-buffer/src/bytes.rs
@@ -111,7 +111,7 @@ impl Drop for Bytes {
fn drop(&mut self) {
match &self.deallocation {
Deallocation::Arrow(capacity) => {
- unsafe { alloc::free_aligned::<u8>(self.ptr, *capacity) };
+ unsafe { alloc::free_aligned(self.ptr, *capacity) };
}
// The automatic drop implementation will free the memory once the
reference count reaches zero
Deallocation::Custom(_allocation) => (),
diff --git a/arrow/src/util/mod.rs b/arrow-buffer/src/lib.rs
similarity index 62%
copy from arrow/src/util/mod.rs
copy to arrow-buffer/src/lib.rs
index 6f68398e7..a8aca7c3d 100644
--- a/arrow/src/util/mod.rs
+++ b/arrow-buffer/src/lib.rs
@@ -15,24 +15,10 @@
// specific language governing permissions and limitations
// under the License.
-#[cfg(feature = "test_utils")]
-pub mod bench_util;
-pub mod bit_chunk_iterator;
-pub mod bit_iterator;
-pub(crate) mod bit_mask;
-pub mod bit_util;
-#[cfg(feature = "test_utils")]
-pub mod data_gen;
-pub mod display;
-#[cfg(feature = "prettyprint")]
-pub mod pretty;
-pub(crate) mod serialization;
-pub mod string_writer;
-#[cfg(any(test, feature = "test_utils"))]
-pub mod test_util;
+//! Buffer abstractions for [Apache Arrow](https://docs.rs/arrow)
-mod trusted_len;
-pub(crate) use trusted_len::trusted_len_unzip;
-
-pub mod decimal;
-pub(crate) mod reader_parser;
+pub mod alloc;
+pub mod buffer;
+mod bytes;
+pub mod native;
+pub mod util;
diff --git a/arrow/src/datatypes/native.rs b/arrow-buffer/src/native.rs
similarity index 66%
copy from arrow/src/datatypes/native.rs
copy to arrow-buffer/src/native.rs
index 444f2b27d..d8431953c 100644
--- a/arrow/src/datatypes/native.rs
+++ b/arrow-buffer/src/native.rs
@@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-use super::DataType;
use half::f16;
mod private {
@@ -92,134 +91,6 @@ pub trait ArrowNativeType:
}
}
-/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with
the
-/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that
implement [`ArrowNativeType`].
-pub trait ArrowPrimitiveType: 'static {
- /// Corresponding Rust native type for the primitive type.
- type Native: ArrowNativeType;
-
- /// the corresponding Arrow data type of this primitive type.
- const DATA_TYPE: DataType;
-
- /// Returns the byte width of this primitive type.
- fn get_byte_width() -> usize {
- std::mem::size_of::<Self::Native>()
- }
-
- /// Returns a default value of this primitive type.
- ///
- /// This is useful for aggregate array ops like `sum()`, `mean()`.
- fn default_value() -> Self::Native {
- Default::default()
- }
-}
-
-pub(crate) mod native_op {
- use super::ArrowNativeType;
- use std::ops::{Add, Div, Mul, Sub};
-
- /// Trait for ArrowNativeType to provide overflow-checking and
non-overflow-checking
- /// variants for arithmetic operations. For floating point types, this
provides some
- /// default implementations. Integer types that need to deal with overflow
can implement
- /// this trait.
- ///
- /// The APIs with `_wrapping` suffix are the variant of
non-overflow-checking. If overflow
- /// occurred, they will supposedly wrap around the boundary of the type.
- ///
- /// The APIs with `_checked` suffix are the variant of overflow-checking
which return `None`
- /// if overflow occurred.
- pub trait ArrowNativeTypeOp:
- ArrowNativeType
- + Add<Output = Self>
- + Sub<Output = Self>
- + Mul<Output = Self>
- + Div<Output = Self>
- {
- fn add_checked(self, rhs: Self) -> Option<Self> {
- Some(self + rhs)
- }
-
- fn add_wrapping(self, rhs: Self) -> Self {
- self + rhs
- }
-
- fn sub_checked(self, rhs: Self) -> Option<Self> {
- Some(self - rhs)
- }
-
- fn sub_wrapping(self, rhs: Self) -> Self {
- self - rhs
- }
-
- fn mul_checked(self, rhs: Self) -> Option<Self> {
- Some(self * rhs)
- }
-
- fn mul_wrapping(self, rhs: Self) -> Self {
- self * rhs
- }
-
- fn div_checked(self, rhs: Self) -> Option<Self> {
- Some(self / rhs)
- }
-
- fn div_wrapping(self, rhs: Self) -> Self {
- self / rhs
- }
- }
-}
-
-macro_rules! native_type_op {
- ($t:tt) => {
- impl native_op::ArrowNativeTypeOp for $t {
- fn add_checked(self, rhs: Self) -> Option<Self> {
- self.checked_add(rhs)
- }
-
- fn add_wrapping(self, rhs: Self) -> Self {
- self.wrapping_add(rhs)
- }
-
- fn sub_checked(self, rhs: Self) -> Option<Self> {
- self.checked_sub(rhs)
- }
-
- fn sub_wrapping(self, rhs: Self) -> Self {
- self.wrapping_sub(rhs)
- }
-
- fn mul_checked(self, rhs: Self) -> Option<Self> {
- self.checked_mul(rhs)
- }
-
- fn mul_wrapping(self, rhs: Self) -> Self {
- self.wrapping_mul(rhs)
- }
-
- fn div_checked(self, rhs: Self) -> Option<Self> {
- self.checked_div(rhs)
- }
-
- fn div_wrapping(self, rhs: Self) -> Self {
- self.wrapping_div(rhs)
- }
- }
- };
-}
-
-native_type_op!(i8);
-native_type_op!(i16);
-native_type_op!(i32);
-native_type_op!(i64);
-native_type_op!(u8);
-native_type_op!(u16);
-native_type_op!(u32);
-native_type_op!(u64);
-
-impl native_op::ArrowNativeTypeOp for f16 {}
-impl native_op::ArrowNativeTypeOp for f32 {}
-impl native_op::ArrowNativeTypeOp for f64 {}
-
impl private::Sealed for i8 {}
impl ArrowNativeType for i8 {
#[inline]
diff --git a/arrow/src/util/bit_chunk_iterator.rs
b/arrow-buffer/src/util/bit_chunk_iterator.rs
similarity index 99%
rename from arrow/src/util/bit_chunk_iterator.rs
rename to arrow-buffer/src/util/bit_chunk_iterator.rs
index f0127ed22..ba028204d 100644
--- a/arrow/src/util/bit_chunk_iterator.rs
+++ b/arrow-buffer/src/util/bit_chunk_iterator.rs
@@ -153,7 +153,7 @@ impl<'a> UnalignedBitChunk<'a> {
self.chunks
}
- pub(crate) fn iter(&self) -> UnalignedBitChunkIterator<'a> {
+ pub fn iter(&self) -> UnalignedBitChunkIterator<'a> {
self.prefix
.into_iter()
.chain(self.chunks.iter().cloned())
@@ -166,7 +166,7 @@ impl<'a> UnalignedBitChunk<'a> {
}
}
-pub(crate) type UnalignedBitChunkIterator<'a> = std::iter::Chain<
+pub type UnalignedBitChunkIterator<'a> = std::iter::Chain<
std::iter::Chain<
std::option::IntoIter<u64>,
std::iter::Cloned<std::slice::Iter<'a, u64>>,
diff --git a/arrow/src/util/bit_util.rs b/arrow-buffer/src/util/bit_util.rs
similarity index 100%
rename from arrow/src/util/bit_util.rs
rename to arrow-buffer/src/util/bit_util.rs
diff --git a/arrow/src/util/mod.rs b/arrow-buffer/src/util/mod.rs
similarity index 65%
copy from arrow/src/util/mod.rs
copy to arrow-buffer/src/util/mod.rs
index 6f68398e7..c1cb284dc 100644
--- a/arrow/src/util/mod.rs
+++ b/arrow-buffer/src/util/mod.rs
@@ -15,24 +15,5 @@
// specific language governing permissions and limitations
// under the License.
-#[cfg(feature = "test_utils")]
-pub mod bench_util;
pub mod bit_chunk_iterator;
-pub mod bit_iterator;
-pub(crate) mod bit_mask;
pub mod bit_util;
-#[cfg(feature = "test_utils")]
-pub mod data_gen;
-pub mod display;
-#[cfg(feature = "prettyprint")]
-pub mod pretty;
-pub(crate) mod serialization;
-pub mod string_writer;
-#[cfg(any(test, feature = "test_utils"))]
-pub mod test_util;
-
-mod trusted_len;
-pub(crate) use trusted_len::trusted_len_unzip;
-
-pub mod decimal;
-pub(crate) mod reader_parser;
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 2de4db642..c66cef612 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -44,6 +44,8 @@ ahash = { version = "0.8", default-features = false, features
= ["compile-time-r
ahash = { version = "0.8", default-features = false, features =
["runtime-rng"] }
[dependencies]
+arrow-buffer = { path = "../arrow-buffer", version = "22.0.0" }
+
serde = { version = "1.0", default-features = false, features = ["derive"],
optional = true }
serde_json = { version = "1.0", default-features = false, features = ["std"],
optional = true }
indexmap = { version = "1.9", default-features = false, features = ["std"] }
diff --git a/arrow/benches/buffer_bit_ops.rs b/arrow/benches/buffer_bit_ops.rs
index 6c6bb0463..68b22df4b 100644
--- a/arrow/benches/buffer_bit_ops.rs
+++ b/arrow/benches/buffer_bit_ops.rs
@@ -38,15 +38,15 @@ fn create_buffer(size: usize) -> Buffer {
}
fn bench_buffer_and(left: &Buffer, right: &Buffer) {
- criterion::black_box((left & right).unwrap());
+ criterion::black_box(buffer_bin_and(left, 0, right, 0, left.len() * 8));
}
fn bench_buffer_or(left: &Buffer, right: &Buffer) {
- criterion::black_box((left | right).unwrap());
+ criterion::black_box(buffer_bin_or(left, 0, right, 0, left.len() * 8));
}
fn bench_buffer_not(buffer: &Buffer) {
- criterion::black_box(!buffer);
+ criterion::black_box(buffer_unary_not(buffer, 0, buffer.len() * 8));
}
fn bench_buffer_and_with_offsets(
diff --git a/arrow/src/alloc/types.rs b/arrow/src/alloc/types.rs
deleted file mode 100644
index 026e1241f..000000000
--- a/arrow/src/alloc/types.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::datatypes::DataType;
-use half::f16;
-
-/// A type that Rust's custom allocator knows how to allocate and deallocate.
-/// This is implemented for all Arrow's physical types whose in-memory
representation
-/// matches Rust's physical types. Consider this trait sealed.
-/// # Safety
-/// Do not implement this trait.
-pub unsafe trait NativeType:
- Sized + Copy + std::fmt::Debug + std::fmt::Display + PartialEq + Default +
Sized + 'static
-{
- type Bytes: AsRef<[u8]>;
-
- /// Whether a DataType is a valid type for this physical representation.
- fn is_valid(data_type: &DataType) -> bool;
-
- /// How this type represents itself as bytes in little endianess.
- /// This is used for IPC, where data is communicated with a specific
endianess.
- fn to_le_bytes(&self) -> Self::Bytes;
-}
-
-macro_rules! create_native {
- ($native_ty:ty,$($impl_pattern:pat_param)|+) => {
- unsafe impl NativeType for $native_ty {
- type Bytes = [u8; std::mem::size_of::<Self>()];
-
- #[inline]
- fn to_le_bytes(&self) -> Self::Bytes {
- Self::to_le_bytes(*self)
- }
-
- #[inline]
- fn is_valid(data_type: &DataType) -> bool {
- matches!(data_type, $($impl_pattern)|+)
- }
- }
- };
-}
-
-create_native!(u8, DataType::UInt8);
-create_native!(u16, DataType::UInt16);
-create_native!(u32, DataType::UInt32);
-create_native!(u64, DataType::UInt64);
-create_native!(i8, DataType::Int8);
-create_native!(i16, DataType::Int16);
-create_native!(
- i32,
- DataType::Int32 | DataType::Date32 | DataType::Time32(_)
-);
-create_native!(
- i64,
- DataType::Int64 | DataType::Date64 | DataType::Time64(_) |
DataType::Timestamp(_, _)
-);
-create_native!(f16, DataType::Float16);
-create_native!(f32, DataType::Float32);
-create_native!(f64, DataType::Float64);
diff --git a/arrow/src/array/array_list.rs b/arrow/src/array/array_list.rs
index b9c05014c..e830acdc2 100644
--- a/arrow/src/array/array_list.rs
+++ b/arrow/src/array/array_list.rs
@@ -844,7 +844,7 @@ mod tests {
#[test]
#[should_panic(expected = "memory is not aligned")]
fn test_primitive_array_alignment() {
- let ptr = alloc::allocate_aligned::<u8>(8);
+ let ptr = alloc::allocate_aligned(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf2 = buf.slice(1);
let array_data = ArrayData::builder(DataType::Int32)
@@ -860,7 +860,7 @@ mod tests {
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_list_array_alignment() {
- let ptr = alloc::allocate_aligned::<u8>(8);
+ let ptr = alloc::allocate_aligned(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf2 = buf.slice(1);
diff --git a/arrow/src/bitmap.rs b/arrow/src/bitmap.rs
index 4ba1bb9f8..4491da463 100644
--- a/arrow/src/bitmap.rs
+++ b/arrow/src/bitmap.rs
@@ -18,10 +18,11 @@
//! Defines [Bitmap] for tracking validity bitmaps
use crate::buffer::Buffer;
-use crate::error::Result;
+use crate::error::{ArrowError, Result};
use crate::util::bit_util;
use std::mem;
+use arrow_buffer::buffer::{buffer_bin_and, buffer_bin_or};
use std::ops::{BitAnd, BitOr};
#[derive(Debug, Clone)]
@@ -79,7 +80,18 @@ impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap {
type Output = Result<Bitmap>;
fn bitand(self, rhs: &'b Bitmap) -> Result<Bitmap> {
- Ok(Bitmap::from((&self.bits & &rhs.bits)?))
+ if self.bits.len() != rhs.bits.len() {
+ return Err(ArrowError::ComputeError(
+ "Buffers must be the same size to apply Bitwise
AND.".to_string(),
+ ));
+ }
+ Ok(Bitmap::from(buffer_bin_and(
+ &self.bits,
+ 0,
+ &rhs.bits,
+ 0,
+ self.bit_len(),
+ )))
}
}
@@ -87,7 +99,18 @@ impl<'a, 'b> BitOr<&'b Bitmap> for &'a Bitmap {
type Output = Result<Bitmap>;
fn bitor(self, rhs: &'b Bitmap) -> Result<Bitmap> {
- Ok(Bitmap::from((&self.bits | &rhs.bits)?))
+ if self.bits.len() != rhs.bits.len() {
+ return Err(ArrowError::ComputeError(
+ "Buffers must be the same size to apply Bitwise
OR.".to_string(),
+ ));
+ }
+ Ok(Bitmap::from(buffer_bin_or(
+ &self.bits,
+ 0,
+ &rhs.bits,
+ 0,
+ self.bit_len(),
+ )))
}
}
diff --git a/arrow/src/buffer/mod.rs b/arrow/src/buffer/mod.rs
deleted file mode 100644
index b392b0583..000000000
--- a/arrow/src/buffer/mod.rs
+++ /dev/null
@@ -1,72 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains two main structs: [Buffer] and [MutableBuffer]. A
buffer represents
-//! a contiguous memory region that can be shared via `offsets`.
-
-mod immutable;
-pub use immutable::*;
-mod mutable;
-pub use mutable::*;
-mod ops;
-mod scalar;
-pub use scalar::*;
-
-pub use ops::*;
-
-use crate::error::{ArrowError, Result};
-use std::ops::{BitAnd, BitOr, Not};
-
-impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer {
- type Output = Result<Buffer>;
-
- fn bitand(self, rhs: &'b Buffer) -> Result<Buffer> {
- if self.len() != rhs.len() {
- return Err(ArrowError::ComputeError(
- "Buffers must be the same size to apply Bitwise
AND.".to_string(),
- ));
- }
-
- let len_in_bits = self.len() * 8;
- Ok(buffer_bin_and(self, 0, rhs, 0, len_in_bits))
- }
-}
-
-impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer {
- type Output = Result<Buffer>;
-
- fn bitor(self, rhs: &'b Buffer) -> Result<Buffer> {
- if self.len() != rhs.len() {
- return Err(ArrowError::ComputeError(
- "Buffers must be the same size to apply Bitwise
OR.".to_string(),
- ));
- }
-
- let len_in_bits = self.len() * 8;
-
- Ok(buffer_bin_or(self, 0, rhs, 0, len_in_bits))
- }
-}
-
-impl Not for &Buffer {
- type Output = Buffer;
-
- fn not(self) -> Buffer {
- let len_in_bits = self.len() * 8;
- buffer_unary_not(self, 0, len_in_bits)
- }
-}
diff --git a/arrow/src/compute/kernels/boolean.rs
b/arrow/src/compute/kernels/boolean.rs
index c51953a75..b8719ad2d 100644
--- a/arrow/src/compute/kernels/boolean.rs
+++ b/arrow/src/compute/kernels/boolean.rs
@@ -22,8 +22,6 @@
//! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
-use std::ops::Not;
-
use crate::array::{Array, ArrayData, BooleanArray, PrimitiveArray};
use crate::buffer::{
bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and,
buffer_bin_or,
@@ -85,14 +83,13 @@ pub(crate) fn build_null_buffer_for_and_kleene(
// The final null bits are:
// (a | (c & !d)) & (c | (a & !b))
Some(bitwise_quaternary_op_helper(
- left_null_buffer,
- left_offset,
- left_buffer,
- left_offset,
- right_null_buffer,
- right_offset,
- right_buffer,
- right_offset,
+ [
+ left_null_buffer,
+ left_buffer,
+ right_null_buffer,
+ right_buffer,
+ ],
+ [left_offset, left_offset, right_offset, right_offset],
len_in_bits,
|a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
))
@@ -163,14 +160,13 @@ pub(crate) fn build_null_buffer_for_or_kleene(
// The final null bits are:
// (a | (c & d)) & (c | (a & b))
Some(bitwise_quaternary_op_helper(
- left_null_buffer,
- left_offset,
- left_buffer,
- left_offset,
- right_null_buffer,
- right_offset,
- right_buffer,
- right_offset,
+ [
+ left_null_buffer,
+ left_buffer,
+ right_null_buffer,
+ right_buffer,
+ ],
+ [left_offset, left_offset, right_offset, right_offset],
len_in_bits,
|a, b, c, d| (a | (c & d)) & (c | (a & b)),
))
@@ -493,7 +489,6 @@ where
));
}
let left_data = left.data();
- let right_data = right.data();
// If left has no bitmap, create a new one with all values set for nullity
op later
// left=0 (null) right=null output bitmap=null
@@ -507,33 +502,31 @@ where
//
// Do the right expression !(right_values & right_bitmap) first since
there are two steps
// TRICK: convert BooleanArray buffer as a bitmap for faster operation
- let right_combo_buffer = match right.data().null_bitmap() {
+ let rcb = match right.data().null_bitmap() {
Some(right_bitmap) => {
- // NOTE: right values and bitmaps are combined and stay at bit
offset right.offset()
- (right.values() & &right_bitmap.bits).ok().map(|b| b.not())
+ let and = buffer_bin_and(
+ right.values(),
+ right.offset(),
+ &right_bitmap.bits,
+ right.offset(),
+ right.len(),
+ );
+ buffer_unary_not(&and, 0, right.len())
}
- None => Some(!right.values()),
+ None => buffer_unary_not(right.values(), right.offset(), right.len()),
};
// AND of original left null bitmap with right expression
// Here we take care of the possible offsets of the left and right arrays
all at once.
let modified_null_buffer = match left_data.null_bitmap() {
- Some(left_null_bitmap) => match right_combo_buffer {
- Some(rcb) => Some(buffer_bin_and(
- &left_null_bitmap.bits,
- left_data.offset(),
- &rcb,
- right_data.offset(),
- left_data.len(),
- )),
- None => Some(
- left_null_bitmap
- .bits
- .bit_slice(left_data.offset(), left.len()),
- ),
- },
- None => right_combo_buffer
- .map(|rcb| rcb.bit_slice(right_data.offset(), right_data.len())),
+ Some(left_null_bitmap) => buffer_bin_and(
+ &left_null_bitmap.bits,
+ left_data.offset(),
+ &rcb,
+ 0,
+ left_data.len(),
+ ),
+ None => rcb,
};
// Align/shift left data on offset as needed, since new bitmaps are
shifted and aligned to 0 already
@@ -556,7 +549,7 @@ where
T::DATA_TYPE,
left.len(),
None, // force new to compute the number of null bits
- modified_null_buffer,
+ Some(modified_null_buffer),
0, // No need for offset since left data has been shifted
data_buffers,
left_data.child_data().to_vec(),
diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs
index 444f2b27d..8c329a066 100644
--- a/arrow/src/datatypes/native.rs
+++ b/arrow/src/datatypes/native.rs
@@ -16,82 +16,9 @@
// under the License.
use super::DataType;
+pub use arrow_buffer::native::{ArrowNativeType, ToByteSlice};
use half::f16;
-mod private {
- pub trait Sealed {}
-}
-
-/// Trait expressing a Rust type that has the same in-memory representation
-/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow
is represented in bits).
-///
-/// In little endian machines, types that implement [`ArrowNativeType`] can be
memcopied to arrow buffers
-/// as is.
-///
-/// # Transmute Safety
-///
-/// A type T implementing this trait means that any arbitrary slice of bytes
of length and
-/// alignment `size_of::<T>()` can be safely interpreted as a value of that
type without
-/// being unsound, i.e. potentially resulting in undefined behaviour.
-///
-/// Note: in the case of floating point numbers this transmutation can result
in a signalling
-/// NaN, which, whilst sound, can be unwieldy. In general, whilst it is
perfectly sound to
-/// reinterpret bytes as different types using this trait, it is likely
unwise. For more information
-/// see [f32::from_bits] and [f64::from_bits].
-///
-/// Note: `bool` is restricted to `0` or `1`, and so `bool: !ArrowNativeType`
-///
-/// # Sealed
-///
-/// Due to the above restrictions, this trait is sealed to prevent accidental
misuse
-pub trait ArrowNativeType:
- std::fmt::Debug
- + Send
- + Sync
- + Copy
- + PartialOrd
- + std::str::FromStr
- + Default
- + private::Sealed
- + 'static
-{
- /// Convert native type from usize.
- #[inline]
- fn from_usize(_: usize) -> Option<Self> {
- None
- }
-
- /// Convert native type to usize.
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- None
- }
-
- /// Convert native type to isize.
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- None
- }
-
- /// Convert native type from i32.
- #[inline]
- fn from_i32(_: i32) -> Option<Self> {
- None
- }
-
- /// Convert native type from i64.
- #[inline]
- fn from_i64(_: i64) -> Option<Self> {
- None
- }
-
- /// Convert native type from i128.
- #[inline]
- fn from_i128(_: i128) -> Option<Self> {
- None
- }
-}
-
/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with
the
/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that
implement [`ArrowNativeType`].
pub trait ArrowPrimitiveType: 'static {
@@ -219,214 +146,3 @@ native_type_op!(u64);
impl native_op::ArrowNativeTypeOp for f16 {}
impl native_op::ArrowNativeTypeOp for f32 {}
impl native_op::ArrowNativeTypeOp for f64 {}
-
-impl private::Sealed for i8 {}
-impl ArrowNativeType for i8 {
- #[inline]
- fn from_usize(v: usize) -> Option<Self> {
- num::FromPrimitive::from_usize(v)
- }
-
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- num::ToPrimitive::to_usize(self)
- }
-
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- num::ToPrimitive::to_isize(self)
- }
-}
-
-impl private::Sealed for i16 {}
-impl ArrowNativeType for i16 {
- #[inline]
- fn from_usize(v: usize) -> Option<Self> {
- num::FromPrimitive::from_usize(v)
- }
-
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- num::ToPrimitive::to_usize(self)
- }
-
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- num::ToPrimitive::to_isize(self)
- }
-}
-
-impl private::Sealed for i32 {}
-impl ArrowNativeType for i32 {
- #[inline]
- fn from_usize(v: usize) -> Option<Self> {
- num::FromPrimitive::from_usize(v)
- }
-
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- num::ToPrimitive::to_usize(self)
- }
-
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- num::ToPrimitive::to_isize(self)
- }
-
- /// Convert native type from i32.
- #[inline]
- fn from_i32(val: i32) -> Option<Self> {
- Some(val)
- }
-}
-
-impl private::Sealed for i64 {}
-impl ArrowNativeType for i64 {
- #[inline]
- fn from_usize(v: usize) -> Option<Self> {
- num::FromPrimitive::from_usize(v)
- }
-
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- num::ToPrimitive::to_usize(self)
- }
-
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- num::ToPrimitive::to_isize(self)
- }
-
- /// Convert native type from i64.
- #[inline]
- fn from_i64(val: i64) -> Option<Self> {
- Some(val)
- }
-}
-
-impl private::Sealed for i128 {}
-impl ArrowNativeType for i128 {
- #[inline]
- fn from_usize(v: usize) -> Option<Self> {
- num::FromPrimitive::from_usize(v)
- }
-
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- num::ToPrimitive::to_usize(self)
- }
-
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- num::ToPrimitive::to_isize(self)
- }
-
- /// Convert native type from i128.
- #[inline]
- fn from_i128(val: i128) -> Option<Self> {
- Some(val)
- }
-}
-
-impl private::Sealed for u8 {}
-impl ArrowNativeType for u8 {
- #[inline]
- fn from_usize(v: usize) -> Option<Self> {
- num::FromPrimitive::from_usize(v)
- }
-
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- num::ToPrimitive::to_usize(self)
- }
-
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- num::ToPrimitive::to_isize(self)
- }
-}
-
-impl private::Sealed for u16 {}
-impl ArrowNativeType for u16 {
- #[inline]
- fn from_usize(v: usize) -> Option<Self> {
- num::FromPrimitive::from_usize(v)
- }
-
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- num::ToPrimitive::to_usize(self)
- }
-
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- num::ToPrimitive::to_isize(self)
- }
-}
-
-impl private::Sealed for u32 {}
-impl ArrowNativeType for u32 {
- #[inline]
- fn from_usize(v: usize) -> Option<Self> {
- num::FromPrimitive::from_usize(v)
- }
-
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- num::ToPrimitive::to_usize(self)
- }
-
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- num::ToPrimitive::to_isize(self)
- }
-}
-
-impl private::Sealed for u64 {}
-impl ArrowNativeType for u64 {
- #[inline]
- fn from_usize(v: usize) -> Option<Self> {
- num::FromPrimitive::from_usize(v)
- }
-
- #[inline]
- fn to_usize(&self) -> Option<usize> {
- num::ToPrimitive::to_usize(self)
- }
-
- #[inline]
- fn to_isize(&self) -> Option<isize> {
- num::ToPrimitive::to_isize(self)
- }
-}
-
-impl ArrowNativeType for f16 {}
-impl private::Sealed for f16 {}
-impl ArrowNativeType for f32 {}
-impl private::Sealed for f32 {}
-impl ArrowNativeType for f64 {}
-impl private::Sealed for f64 {}
-
-/// Allows conversion from supported Arrow types to a byte slice.
-pub trait ToByteSlice {
- /// Converts this instance into a byte slice
- fn to_byte_slice(&self) -> &[u8];
-}
-
-impl<T: ArrowNativeType> ToByteSlice for [T] {
- #[inline]
- fn to_byte_slice(&self) -> &[u8] {
- let raw_ptr = self.as_ptr() as *const T as *const u8;
- unsafe {
- std::slice::from_raw_parts(raw_ptr, self.len() *
std::mem::size_of::<T>())
- }
- }
-}
-
-impl<T: ArrowNativeType> ToByteSlice for T {
- #[inline]
- fn to_byte_slice(&self) -> &[u8] {
- let raw_ptr = self as *const T as *const u8;
- unsafe { std::slice::from_raw_parts(raw_ptr, std::mem::size_of::<T>())
}
- }
-}
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 87a4799e3..a4d864754 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -248,11 +248,10 @@
#![deny(clippy::redundant_clone)]
#![warn(missing_debug_implementations)]
-pub mod alloc;
+pub use arrow_buffer::{alloc, buffer};
+
pub mod array;
pub mod bitmap;
-pub mod buffer;
-mod bytes;
pub mod compute;
#[cfg(feature = "csv")]
pub mod csv;
diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs
index 6f68398e7..5453c11ab 100644
--- a/arrow/src/util/mod.rs
+++ b/arrow/src/util/mod.rs
@@ -15,12 +15,12 @@
// specific language governing permissions and limitations
// under the License.
+pub use arrow_buffer::util::{bit_chunk_iterator, bit_util};
+
#[cfg(feature = "test_utils")]
pub mod bench_util;
-pub mod bit_chunk_iterator;
pub mod bit_iterator;
pub(crate) mod bit_mask;
-pub mod bit_util;
#[cfg(feature = "test_utils")]
pub mod data_gen;
pub mod display;