This is an automated email from the ASF dual-hosted git repository.
nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new b5ac048 ARROW-11627: [Rust] Make allocator be a generic over type T
b5ac048 is described below
commit b5ac048c75cc55f4039d279f554920be3112d7cd
Author: Jorge C. Leitao <[email protected]>
AuthorDate: Thu Feb 25 07:01:47 2021 +0200
ARROW-11627: [Rust] Make allocator be a generic over type T
The background and rational for this is described
[here](https://github.com/jorgecarleitao/arrow2/tree/proposal); the idea is
that this is groundwork to make our buffers typed, so that we can start
introducing strong typing in the crate.
This change is backward incompatible:
1. Our allocator is now a generic over type `T: NativeType`, which implies
that we can now allocate certain types.
2. The allocator moved from `memory` to a new module `alloc` (inspired
after `std::alloc`).
Necessary steps to migrate existing code:
1. `use arrow::memory` -> `use arrow::alloc`
2. `memory::allocate_aligned(...)` -> `alloc::allocate_aligned::<u8>(...)`
Note how `NativeType` contains `to_le_bytes`; we will use this method for
IPC, where we need to serialize buffers with a specific endianess. This is
ground work to enable multiple endianesses support
Closes #9495 from jorgecarleitao/alloc_t
Authored-by: Jorge C. Leitao <[email protected]>
Signed-off-by: Neville Dipale <[email protected]>
---
rust/arrow/src/alloc/alignment.rs | 119 ++++++++++++++++
rust/arrow/src/alloc/mod.rs | 136 ++++++++++++++++++
rust/arrow/src/alloc/types.rs | 175 +++++++++++++++++++++++
rust/arrow/src/array/array_list.rs | 6 +-
rust/arrow/src/array/raw_pointer.rs | 7 +-
rust/arrow/src/buffer/immutable.rs | 18 +--
rust/arrow/src/buffer/mutable.rs | 16 ++-
rust/arrow/src/bytes.rs | 4 +-
rust/arrow/src/lib.rs | 2 +-
rust/arrow/src/memory.rs | 277 ------------------------------------
10 files changed, 456 insertions(+), 304 deletions(-)
diff --git a/rust/arrow/src/alloc/alignment.rs
b/rust/arrow/src/alloc/alignment.rs
new file mode 100644
index 0000000..dbf4602
--- /dev/null
+++ b/rust/arrow/src/alloc/alignment.rs
@@ -0,0 +1,119 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: Below code is written for spatial/temporal prefetcher optimizations.
Memory allocation
+// should align well with usage pattern of cache access and block sizes on
layers of storage levels from
+// registers to non-volatile memory. These alignments are all cache aware
alignments incorporated
+// from [cuneiform](https://crates.io/crates/cuneiform) crate. This approach
mimicks Intel TBB's
+// cache_aligned_allocator which exploits cache locality and minimizes
prefetch signals
+// resulting in less round trip time between the layers of storage.
+// For further info: https://software.intel.com/en-us/node/506094
+
+// 32-bit architecture and things other than netburst microarchitecture are
using 64 bytes.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "x86")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// Intel x86_64:
+// L2D streamer from L1:
+// Loads data or instructions from memory to the second-level cache. To use
the streamer,
+// organize the data or instructions in blocks of 128 bytes, aligned on 128
bytes.
+// -
https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "x86_64")]
+pub const ALIGNMENT: usize = 1 << 7;
+
+// 24Kc:
+// Data Line Size
+// -
https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00346-2B-24K-DTS-04.00.pdf
+// -
https://gitlab.e.foundation/e/devices/samsung/n7100/stable_android_kernel_samsung_smdk4412/commit/2dbac10263b2f3c561de68b4c369bc679352ccee
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "mips")]
+pub const ALIGNMENT: usize = 1 << 5;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "mips64")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Defaults for powerpc
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "powerpc")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Defaults for the ppc 64
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "powerpc64")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// e.g.: sifive
+// -
https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41
+// in general all of them are the same.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "riscv")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// This size is same across all hardware for this architecture.
+// -
https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2s390_2include_2asm_2cache_8h.html
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "s390x")]
+pub const ALIGNMENT: usize = 1 << 8;
+
+// This size is same across all hardware for this architecture.
+// -
https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2sparc_2include_2asm_2cache_8h.html#a9400cc2ba37e33279bdbc510a6311fb4
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "sparc")]
+pub const ALIGNMENT: usize = 1 << 5;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "sparc64")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// On ARM cache line sizes are fixed. both v6 and v7.
+// Need to add board specific or platform specific things later.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "thumbv6")]
+pub const ALIGNMENT: usize = 1 << 5;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "thumbv7")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Operating Systems cache size determines this.
+// Currently no way to determine this without runtime inference.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "wasm32")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// Same as v6 and v7.
+// List goes like that:
+// Cortex A, M, R, ARM v7, v7-M, Krait and NeoverseN uses this size.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "arm")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Combined from 4 sectors. Volta says 128.
+// Prevent chunk optimizations better to go to the default size.
+// If you have smaller data with less padded functionality then use 32 with
force option.
+// -
https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "nvptx")]
+pub const ALIGNMENT: usize = 1 << 7;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "nvptx64")]
+pub const ALIGNMENT: usize = 1 << 7;
+
+// This size is same across all hardware for this architecture.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "aarch64")]
+pub const ALIGNMENT: usize = 1 << 6;
diff --git a/rust/arrow/src/alloc/mod.rs b/rust/arrow/src/alloc/mod.rs
new file mode 100644
index 0000000..a225d32
--- /dev/null
+++ b/rust/arrow/src/alloc/mod.rs
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines memory-related functions, such as allocate/deallocate/reallocate
memory
+//! regions, cache and allocation alignments.
+
+use std::mem::size_of;
+use std::ptr::NonNull;
+use std::{
+ alloc::{handle_alloc_error, Layout},
+ sync::atomic::AtomicIsize,
+};
+
+mod alignment;
+mod types;
+
+pub use alignment::ALIGNMENT;
+pub use types::NativeType;
+
+// If this number is not zero after all objects have been `drop`, there is a
memory leak
+pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
+
+#[inline]
+unsafe fn null_pointer<T: NativeType>() -> NonNull<T> {
+ NonNull::new_unchecked(ALIGNMENT as *mut T)
+}
+
+/// Allocates a cache-aligned memory region of `size` bytes with uninitialized
values.
+/// This is more performant than using [allocate_aligned_zeroed] when all
bytes will have
+/// an unknown or non-zero value and is semantically similar to `malloc`.
+pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
+ unsafe {
+ if size == 0 {
+ null_pointer()
+ } else {
+ let size = size * size_of::<T>();
+ ALLOCATIONS.fetch_add(size as isize,
std::sync::atomic::Ordering::SeqCst);
+
+ let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
+ let raw_ptr = std::alloc::alloc(layout) as *mut T;
+ NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
+ }
+ }
+}
+
+/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of
them.
+/// This is more performant than using [allocate_aligned] and setting all
bytes to zero
+/// and is semantically similar to `calloc`.
+pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
+ unsafe {
+ if size == 0 {
+ null_pointer()
+ } else {
+ let size = size * size_of::<T>();
+ ALLOCATIONS.fetch_add(size as isize,
std::sync::atomic::Ordering::SeqCst);
+
+ let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
+ let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T;
+ NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
+ }
+ }
+}
+
+/// # Safety
+///
+/// This function is unsafe because undefined behavior can result if the
caller does not ensure all
+/// of the following:
+///
+/// * ptr must denote a block of memory currently allocated via this allocator,
+///
+/// * size must be the same size that was used to allocate that block of
memory,
+pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
+ if ptr != null_pointer() {
+ let size = size * size_of::<T>();
+ ALLOCATIONS.fetch_sub(size as isize,
std::sync::atomic::Ordering::SeqCst);
+ std::alloc::dealloc(
+ ptr.as_ptr() as *mut u8,
+ Layout::from_size_align_unchecked(size, ALIGNMENT),
+ );
+ }
+}
+
+/// # Safety
+///
+/// This function is unsafe because undefined behavior can result if the
caller does not ensure all
+/// of the following:
+///
+/// * ptr must be currently allocated via this allocator,
+///
+/// * new_size must be greater than zero.
+///
+/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must
not overflow (i.e.,
+/// the rounded value must be less than usize::MAX).
+pub unsafe fn reallocate<T: NativeType>(
+ ptr: NonNull<T>,
+ old_size: usize,
+ new_size: usize,
+) -> NonNull<T> {
+ let old_size = old_size * size_of::<T>();
+ let new_size = new_size * size_of::<T>();
+ if ptr == null_pointer() {
+ return allocate_aligned(new_size);
+ }
+
+ if new_size == 0 {
+ free_aligned(ptr, old_size);
+ return null_pointer();
+ }
+
+ ALLOCATIONS.fetch_add(
+ new_size as isize - old_size as isize,
+ std::sync::atomic::Ordering::SeqCst,
+ );
+ let raw_ptr = std::alloc::realloc(
+ ptr.as_ptr() as *mut u8,
+ Layout::from_size_align_unchecked(old_size, ALIGNMENT),
+ new_size,
+ ) as *mut T;
+ NonNull::new(raw_ptr).unwrap_or_else(|| {
+ handle_alloc_error(Layout::from_size_align_unchecked(new_size,
ALIGNMENT))
+ })
+}
diff --git a/rust/arrow/src/alloc/types.rs b/rust/arrow/src/alloc/types.rs
new file mode 100644
index 0000000..0e177da
--- /dev/null
+++ b/rust/arrow/src/alloc/types.rs
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::datatypes::DataType;
+
+/// A type that Rust's custom allocator knows how to allocate and deallocate.
+/// This is implemented for all Arrow's physical types whose in-memory
representation
+/// matches Rust's physical types. Consider this trait sealed.
+/// # Safety
+/// Do not implement this trait.
+pub unsafe trait NativeType:
+ Sized + Copy + std::fmt::Debug + std::fmt::Display + PartialEq + Default +
Sized + 'static
+{
+ type Bytes: AsRef<[u8]>;
+
+ /// Whether a DataType is a valid type for this physical representation.
+ fn is_valid(data_type: &DataType) -> bool;
+
+ /// How this type represents itself as bytes in little endianess.
+ /// This is used for IPC, where data is communicated with a specific
endianess.
+ fn to_le_bytes(&self) -> Self::Bytes;
+}
+
+unsafe impl NativeType for u8 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ data_type == &DataType::UInt8
+ }
+}
+
+unsafe impl NativeType for u16 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ data_type == &DataType::UInt16
+ }
+}
+
+unsafe impl NativeType for u32 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ data_type == &DataType::UInt32
+ }
+}
+
+unsafe impl NativeType for u64 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ data_type == &DataType::UInt64
+ }
+}
+
+unsafe impl NativeType for i8 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ data_type == &DataType::Int8
+ }
+}
+
+unsafe impl NativeType for i16 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ data_type == &DataType::Int16
+ }
+}
+
+unsafe impl NativeType for i32 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ matches!(
+ data_type,
+ DataType::Int32 | DataType::Date32 | DataType::Time32(_)
+ )
+ }
+}
+
+unsafe impl NativeType for i64 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ matches!(
+ data_type,
+ DataType::Int64
+ | DataType::Date64
+ | DataType::Time64(_)
+ | DataType::Timestamp(_, _)
+ )
+ }
+}
+
+unsafe impl NativeType for f32 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ data_type == &DataType::Float32
+ }
+}
+
+unsafe impl NativeType for f64 {
+ type Bytes = [u8; std::mem::size_of::<Self>()];
+ #[inline]
+ fn to_le_bytes(&self) -> Self::Bytes {
+ Self::to_le_bytes(*self)
+ }
+
+ #[inline]
+ fn is_valid(data_type: &DataType) -> bool {
+ data_type == &DataType::Float64
+ }
+}
diff --git a/rust/arrow/src/array/array_list.rs
b/rust/arrow/src/array/array_list.rs
index 8458836..f2076b3 100644
--- a/rust/arrow/src/array/array_list.rs
+++ b/rust/arrow/src/array/array_list.rs
@@ -378,12 +378,12 @@ impl fmt::Debug for FixedSizeListArray {
#[cfg(test)]
mod tests {
use crate::{
+ alloc,
array::ArrayData,
array::Int32Array,
buffer::Buffer,
datatypes::Field,
datatypes::{Int32Type, ToByteSlice},
- memory,
util::bit_util,
};
@@ -993,7 +993,7 @@ mod tests {
#[test]
#[should_panic(expected = "memory is not aligned")]
fn test_primitive_array_alignment() {
- let ptr = memory::allocate_aligned(8);
+ let ptr = alloc::allocate_aligned::<u8>(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf2 = buf.slice(1);
let array_data =
ArrayData::builder(DataType::Int32).add_buffer(buf2).build();
@@ -1003,7 +1003,7 @@ mod tests {
#[test]
#[should_panic(expected = "memory is not aligned")]
fn test_list_array_alignment() {
- let ptr = memory::allocate_aligned(8);
+ let ptr = alloc::allocate_aligned::<u8>(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf2 = buf.slice(1);
diff --git a/rust/arrow/src/array/raw_pointer.rs
b/rust/arrow/src/array/raw_pointer.rs
index 897dc5b..185e1cb 100644
--- a/rust/arrow/src/array/raw_pointer.rs
+++ b/rust/arrow/src/array/raw_pointer.rs
@@ -15,7 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-use crate::memory;
use std::ptr::NonNull;
/// This struct is highly `unsafe` and offers the possibility to
self-reference a [arrow::Buffer] from [arrow::array::ArrayData].
@@ -36,7 +35,11 @@ impl<T> RawPtrBox<T> {
/// * `ptr` is not aligned to a slice of type `T`. This is guaranteed if
it was built from a slice of type `T`.
pub(super) unsafe fn new(ptr: *const u8) -> Self {
let ptr = NonNull::new(ptr as *mut u8).expect("Pointer cannot be
null");
- assert!(memory::is_ptr_aligned::<T>(ptr), "memory is not aligned");
+ assert_eq!(
+ ptr.as_ptr().align_offset(std::mem::align_of::<T>()),
+ 0,
+ "memory is not aligned"
+ );
Self { ptr: ptr.cast() }
}
diff --git a/rust/arrow/src/buffer/immutable.rs
b/rust/arrow/src/buffer/immutable.rs
index e96bc00..c09e4dd 100644
--- a/rust/arrow/src/buffer/immutable.rs
+++ b/rust/arrow/src/buffer/immutable.rs
@@ -21,9 +21,7 @@ use std::ptr::NonNull;
use std::sync::Arc;
use std::{convert::AsRef, usize};
-use crate::memory;
use crate::util::bit_chunk_iterator::BitChunks;
-use crate::util::bit_util;
use crate::{
bytes::{Bytes, Deallocation},
datatypes::ArrowNativeType,
@@ -56,19 +54,11 @@ impl Buffer {
/// Initializes a [Buffer] from a slice of items.
pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: &T) ->
Self {
- // allocate aligned memory buffer
let slice = items.as_ref();
- let len = slice.len() * std::mem::size_of::<U>();
- let capacity = bit_util::round_upto_multiple_of_64(len);
- let buffer = memory::allocate_aligned(capacity);
- unsafe {
- memory::memcpy(
- buffer,
- NonNull::new_unchecked(slice.as_ptr() as *mut u8),
- len,
- );
- Buffer::build_with_arguments(buffer, len,
Deallocation::Native(capacity))
- }
+ let len = slice.len();
+ let mut buffer = MutableBuffer::with_capacity(len);
+ buffer.extend_from_slice(slice);
+ buffer.into()
}
/// Creates a buffer from an existing memory region (must already be
byte-aligned), this
diff --git a/rust/arrow/src/buffer/mutable.rs b/rust/arrow/src/buffer/mutable.rs
index 9f0238f..ddc0501 100644
--- a/rust/arrow/src/buffer/mutable.rs
+++ b/rust/arrow/src/buffer/mutable.rs
@@ -1,9 +1,9 @@
use std::ptr::NonNull;
use crate::{
+ alloc,
bytes::{Bytes, Deallocation},
datatypes::{ArrowNativeType, ToByteSlice},
- memory,
util::bit_util,
};
@@ -53,8 +53,14 @@ impl MutableBuffer {
/// Allocate a new [MutableBuffer] with initial capacity to be at least
`capacity`.
#[inline]
pub fn new(capacity: usize) -> Self {
+ Self::with_capacity(capacity)
+ }
+
+ /// Allocate a new [MutableBuffer] with initial capacity to be at least
`capacity`.
+ #[inline]
+ pub fn with_capacity(capacity: usize) -> Self {
let capacity = bit_util::round_upto_multiple_of_64(capacity);
- let ptr = memory::allocate_aligned(capacity);
+ let ptr = alloc::allocate_aligned(capacity);
Self {
data: ptr,
len: 0,
@@ -75,7 +81,7 @@ impl MutableBuffer {
/// ```
pub fn from_len_zeroed(len: usize) -> Self {
let new_capacity = bit_util::round_upto_multiple_of_64(len);
- let ptr = memory::allocate_aligned_zeroed(new_capacity);
+ let ptr = alloc::allocate_aligned_zeroed(new_capacity);
Self {
data: ptr,
len,
@@ -324,7 +330,7 @@ unsafe fn reallocate(
) -> (NonNull<u8>, usize) {
let new_capacity = bit_util::round_upto_multiple_of_64(new_capacity);
let new_capacity = std::cmp::max(new_capacity, old_capacity * 2);
- let ptr = memory::reallocate(ptr, old_capacity, new_capacity);
+ let ptr = alloc::reallocate(ptr, old_capacity, new_capacity);
(ptr, new_capacity)
}
@@ -460,7 +466,7 @@ impl std::ops::DerefMut for MutableBuffer {
impl Drop for MutableBuffer {
fn drop(&mut self) {
- unsafe { memory::free_aligned(self.data, self.capacity) };
+ unsafe { alloc::free_aligned(self.data, self.capacity) };
}
}
diff --git a/rust/arrow/src/bytes.rs b/rust/arrow/src/bytes.rs
index 3236549..38fa443 100644
--- a/rust/arrow/src/bytes.rs
+++ b/rust/arrow/src/bytes.rs
@@ -24,7 +24,7 @@ use std::ptr::NonNull;
use std::sync::Arc;
use std::{fmt::Debug, fmt::Formatter};
-use crate::{ffi, memory};
+use crate::{alloc, ffi};
/// Mode of deallocating memory regions
pub enum Deallocation {
@@ -126,7 +126,7 @@ impl Drop for Bytes {
fn drop(&mut self) {
match &self.deallocation {
Deallocation::Native(capacity) => {
- unsafe { memory::free_aligned(self.ptr, *capacity) };
+ unsafe { alloc::free_aligned::<u8>(self.ptr, *capacity) };
}
// foreign interface knows how to deallocate itself.
Deallocation::Foreign(_) => (),
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
index c082d61..9c2ca27 100644
--- a/rust/arrow/src/lib.rs
+++ b/rust/arrow/src/lib.rs
@@ -135,6 +135,7 @@
// introduced to ignore lint errors when upgrading from 2020-04-22 to
2020-11-14
#![allow(clippy::float_equality_without_abs, clippy::type_complexity)]
+mod alloc;
mod arch;
pub mod array;
pub mod bitmap;
@@ -147,7 +148,6 @@ pub mod error;
pub mod ffi;
pub mod ipc;
pub mod json;
-pub mod memory;
pub mod record_batch;
pub mod temporal_conversions;
pub mod tensor;
diff --git a/rust/arrow/src/memory.rs b/rust/arrow/src/memory.rs
deleted file mode 100644
index 0ea8845..0000000
--- a/rust/arrow/src/memory.rs
+++ /dev/null
@@ -1,277 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines memory-related functions, such as allocate/deallocate/reallocate
memory
-//! regions, cache and allocation alignments.
-
-use std::mem::align_of;
-use std::ptr::NonNull;
-use std::{
- alloc::{handle_alloc_error, Layout},
- sync::atomic::AtomicIsize,
-};
-
-// NOTE: Below code is written for spatial/temporal prefetcher optimizations.
Memory allocation
-// should align well with usage pattern of cache access and block sizes on
layers of storage levels from
-// registers to non-volatile memory. These alignments are all cache aware
alignments incorporated
-// from [cuneiform](https://crates.io/crates/cuneiform) crate. This approach
mimicks Intel TBB's
-// cache_aligned_allocator which exploits cache locality and minimizes
prefetch signals
-// resulting in less round trip time between the layers of storage.
-// For further info: https://software.intel.com/en-us/node/506094
-
-// 32-bit architecture and things other than netburst microarchitecture are
using 64 bytes.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "x86")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// Intel x86_64:
-// L2D streamer from L1:
-// Loads data or instructions from memory to the second-level cache. To use
the streamer,
-// organize the data or instructions in blocks of 128 bytes, aligned on 128
bytes.
-// -
https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "x86_64")]
-pub const ALIGNMENT: usize = 1 << 7;
-
-// 24Kc:
-// Data Line Size
-// -
https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00346-2B-24K-DTS-04.00.pdf
-// -
https://gitlab.e.foundation/e/devices/samsung/n7100/stable_android_kernel_samsung_smdk4412/commit/2dbac10263b2f3c561de68b4c369bc679352ccee
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "mips")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "mips64")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Defaults for powerpc
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "powerpc")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Defaults for the ppc 64
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "powerpc64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// e.g.: sifive
-// -
https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41
-// in general all of them are the same.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "riscv")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// This size is same across all hardware for this architecture.
-// -
https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2s390_2include_2asm_2cache_8h.html
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "s390x")]
-pub const ALIGNMENT: usize = 1 << 8;
-
-// This size is same across all hardware for this architecture.
-// -
https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2sparc_2include_2asm_2cache_8h.html#a9400cc2ba37e33279bdbc510a6311fb4
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "sparc")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "sparc64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// On ARM cache line sizes are fixed. both v6 and v7.
-// Need to add board specific or platform specific things later.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "thumbv6")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "thumbv7")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Operating Systems cache size determines this.
-// Currently no way to determine this without runtime inference.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "wasm32")]
-pub const ALIGNMENT: usize = FALLBACK_ALIGNMENT;
-
-// Same as v6 and v7.
-// List goes like that:
-// Cortex A, M, R, ARM v7, v7-M, Krait and NeoverseN uses this size.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "arm")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Combined from 4 sectors. Volta says 128.
-// Prevent chunk optimizations better to go to the default size.
-// If you have smaller data with less padded functionality then use 32 with
force option.
-// -
https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "nvptx")]
-pub const ALIGNMENT: usize = 1 << 7;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "nvptx64")]
-pub const ALIGNMENT: usize = 1 << 7;
-
-// This size is same across all hardware for this architecture.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "aarch64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-#[doc(hidden)]
-/// Fallback cache and allocation multiple alignment size
-const FALLBACK_ALIGNMENT: usize = 1 << 6;
-
-///
-/// As you can see this is global and lives as long as the program lives.
-/// Be careful to not write anything to this pointer in any scenario.
-/// If you use allocation methods shown here you won't have any problems.
-const BYPASS_PTR: NonNull<u8> = unsafe { NonNull::new_unchecked(ALIGNMENT as
*mut u8) };
-
-// If this number is not zero after all objects have been `drop`, there is a
memory leak
-pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
-
-/// Allocates a cache-aligned memory region of `size` bytes with uninitialized
values.
-/// This is more performant than using [allocate_aligned_zeroed] when all
bytes will have
-/// an unknown or non-zero value and is semantically similar to `malloc`.
-pub fn allocate_aligned(size: usize) -> NonNull<u8> {
- unsafe {
- if size == 0 {
- // In a perfect world, there is no need to request zero size
allocation.
- // Currently, passing zero sized layout to alloc is UB.
- // This will dodge allocator api for any type.
- BYPASS_PTR
- } else {
- ALLOCATIONS.fetch_add(size as isize,
std::sync::atomic::Ordering::SeqCst);
-
- let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
- let raw_ptr = std::alloc::alloc(layout);
- NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
- }
- }
-}
-
-/// Allocates a cache-aligned memory region of `size` bytes with `0u8` on all
of them.
-/// This is more performant than using [allocate_aligned] and setting all
bytes to zero
-/// and is semantically similar to `calloc`.
-pub fn allocate_aligned_zeroed(size: usize) -> NonNull<u8> {
- unsafe {
- if size == 0 {
- // In a perfect world, there is no need to request zero size
allocation.
- // Currently, passing zero sized layout to alloc is UB.
- // This will dodge allocator api for any type.
- BYPASS_PTR
- } else {
- ALLOCATIONS.fetch_add(size as isize,
std::sync::atomic::Ordering::SeqCst);
-
- let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
- let raw_ptr = std::alloc::alloc_zeroed(layout);
- NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
- }
- }
-}
-
-/// # Safety
-///
-/// This function is unsafe because undefined behavior can result if the
caller does not ensure all
-/// of the following:
-///
-/// * ptr must denote a block of memory currently allocated via this allocator,
-///
-/// * size must be the same size that was used to allocate that block of
memory,
-pub unsafe fn free_aligned(ptr: NonNull<u8>, size: usize) {
- if ptr != BYPASS_PTR {
- ALLOCATIONS.fetch_sub(size as isize,
std::sync::atomic::Ordering::SeqCst);
- std::alloc::dealloc(
- ptr.as_ptr(),
- Layout::from_size_align_unchecked(size, ALIGNMENT),
- );
- }
-}
-
-/// # Safety
-///
-/// This function is unsafe because undefined behavior can result if the
caller does not ensure all
-/// of the following:
-///
-/// * ptr must be currently allocated via this allocator,
-///
-/// * new_size must be greater than zero.
-///
-/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must
not overflow (i.e.,
-/// the rounded value must be less than usize::MAX).
-pub unsafe fn reallocate(
- ptr: NonNull<u8>,
- old_size: usize,
- new_size: usize,
-) -> NonNull<u8> {
- if ptr == BYPASS_PTR {
- return allocate_aligned(new_size);
- }
-
- if new_size == 0 {
- free_aligned(ptr, old_size);
- return BYPASS_PTR;
- }
-
- ALLOCATIONS.fetch_add(
- new_size as isize - old_size as isize,
- std::sync::atomic::Ordering::SeqCst,
- );
- let raw_ptr = std::alloc::realloc(
- ptr.as_ptr(),
- Layout::from_size_align_unchecked(old_size, ALIGNMENT),
- new_size,
- );
- NonNull::new(raw_ptr).unwrap_or_else(|| {
- handle_alloc_error(Layout::from_size_align_unchecked(new_size,
ALIGNMENT))
- })
-}
-
-/// # Safety
-///
-/// Behavior is undefined if any of the following conditions are violated:
-///
-/// * `src` must be valid for reads of `len * size_of::<u8>()` bytes.
-///
-/// * `dst` must be valid for writes of `len * size_of::<u8>()` bytes.
-///
-/// * Both `src` and `dst` must be properly aligned.
-///
-/// `memcpy` creates a bitwise copy of `T`, regardless of whether `T` is
[`Copy`]. If `T` is not
-/// [`Copy`], using both the values in the region beginning at `*src` and the
region beginning at
-/// `*dst` can [violate memory safety][read-ownership].
-pub unsafe fn memcpy(dst: NonNull<u8>, src: NonNull<u8>, count: usize) {
- if src != BYPASS_PTR {
- std::ptr::copy_nonoverlapping(src.as_ptr(), dst.as_ptr(), count)
- }
-}
-
-pub fn is_ptr_aligned<T>(p: NonNull<u8>) -> bool {
- p.as_ptr().align_offset(align_of::<T>()) == 0
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_allocate() {
- for _ in 0..10 {
- let p = allocate_aligned(1024);
- // make sure this is 64-byte aligned
- assert_eq!(0, (p.as_ptr() as usize) % 64);
- unsafe { free_aligned(p, 1024) };
- }
- }
-}