[arrow] branch master updated: ARROW-11627: [Rust] Make allocator be a generic over type T

nevime Wed, 24 Feb 2021 21:03:23 -0800

This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new b5ac048  ARROW-11627: [Rust] Make allocator be a generic over type T
b5ac048 is described below

commit b5ac048c75cc55f4039d279f554920be3112d7cd
Author: Jorge C. Leitao <[email protected]>
AuthorDate: Thu Feb 25 07:01:47 2021 +0200

    ARROW-11627: [Rust] Make allocator be a generic over type T
    
    The background and rational for this is described 
[here](https://github.com/jorgecarleitao/arrow2/tree/proposal); the idea is 
that this is groundwork to make our buffers typed, so that we can start 
introducing strong typing in the crate.
    
    This change is backward incompatible:
    
    1. Our allocator is now a generic over type `T: NativeType`, which implies 
that we can now allocate certain types.
    2. The allocator moved from `memory` to a new module `alloc` (inspired 
after `std::alloc`).
    
    Necessary steps to migrate existing code:
    
    1. `use arrow::memory` -> `use arrow::alloc`
    2. `memory::allocate_aligned(...)` -> `alloc::allocate_aligned::<u8>(...)`
    
    Note how `NativeType` contains `to_le_bytes`; we will use this method for 
IPC, where we need to serialize buffers with a specific endianess. This is 
ground work to enable multiple endianesses support
    
    Closes #9495 from jorgecarleitao/alloc_t
    
    Authored-by: Jorge C. Leitao <[email protected]>
    Signed-off-by: Neville Dipale <[email protected]>
---
 rust/arrow/src/alloc/alignment.rs   | 119 ++++++++++++++++
 rust/arrow/src/alloc/mod.rs         | 136 ++++++++++++++++++
 rust/arrow/src/alloc/types.rs       | 175 +++++++++++++++++++++++
 rust/arrow/src/array/array_list.rs  |   6 +-
 rust/arrow/src/array/raw_pointer.rs |   7 +-
 rust/arrow/src/buffer/immutable.rs  |  18 +--
 rust/arrow/src/buffer/mutable.rs    |  16 ++-
 rust/arrow/src/bytes.rs             |   4 +-
 rust/arrow/src/lib.rs               |   2 +-
 rust/arrow/src/memory.rs            | 277 ------------------------------------
 10 files changed, 456 insertions(+), 304 deletions(-)

diff --git a/rust/arrow/src/alloc/alignment.rs 
b/rust/arrow/src/alloc/alignment.rs
new file mode 100644
index 0000000..dbf4602
--- /dev/null
+++ b/rust/arrow/src/alloc/alignment.rs
@@ -0,0 +1,119 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: Below code is written for spatial/temporal prefetcher optimizations. 
Memory allocation
+// should align well with usage pattern of cache access and block sizes on 
layers of storage levels from
+// registers to non-volatile memory. These alignments are all cache aware 
alignments incorporated
+// from [cuneiform](https://crates.io/crates/cuneiform) crate. This approach 
mimicks Intel TBB's
+// cache_aligned_allocator which exploits cache locality and minimizes 
prefetch signals
+// resulting in less round trip time between the layers of storage.
+// For further info: https://software.intel.com/en-us/node/506094
+
+// 32-bit architecture and things other than netburst microarchitecture are 
using 64 bytes.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "x86")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// Intel x86_64:
+// L2D streamer from L1:
+// Loads data or instructions from memory to the second-level cache. To use 
the streamer,
+// organize the data or instructions in blocks of 128 bytes, aligned on 128 
bytes.
+// - 
https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "x86_64")]
+pub const ALIGNMENT: usize = 1 << 7;
+
+// 24Kc:
+// Data Line Size
+// - 
https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00346-2B-24K-DTS-04.00.pdf
+// - 
https://gitlab.e.foundation/e/devices/samsung/n7100/stable_android_kernel_samsung_smdk4412/commit/2dbac10263b2f3c561de68b4c369bc679352ccee
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "mips")]
+pub const ALIGNMENT: usize = 1 << 5;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "mips64")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Defaults for powerpc
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "powerpc")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Defaults for the ppc 64
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "powerpc64")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// e.g.: sifive
+// - 
https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41
+// in general all of them are the same.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "riscv")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// This size is same across all hardware for this architecture.
+// - 
https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2s390_2include_2asm_2cache_8h.html
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "s390x")]
+pub const ALIGNMENT: usize = 1 << 8;
+
+// This size is same across all hardware for this architecture.
+// - 
https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2sparc_2include_2asm_2cache_8h.html#a9400cc2ba37e33279bdbc510a6311fb4
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "sparc")]
+pub const ALIGNMENT: usize = 1 << 5;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "sparc64")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// On ARM cache line sizes are fixed. both v6 and v7.
+// Need to add board specific or platform specific things later.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "thumbv6")]
+pub const ALIGNMENT: usize = 1 << 5;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "thumbv7")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Operating Systems cache size determines this.
+// Currently no way to determine this without runtime inference.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "wasm32")]
+pub const ALIGNMENT: usize = 1 << 6;
+
+// Same as v6 and v7.
+// List goes like that:
+// Cortex A, M, R, ARM v7, v7-M, Krait and NeoverseN uses this size.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "arm")]
+pub const ALIGNMENT: usize = 1 << 5;
+
+// Combined from 4 sectors. Volta says 128.
+// Prevent chunk optimizations better to go to the default size.
+// If you have smaller data with less padded functionality then use 32 with 
force option.
+// - 
https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "nvptx")]
+pub const ALIGNMENT: usize = 1 << 7;
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "nvptx64")]
+pub const ALIGNMENT: usize = 1 << 7;
+
+// This size is same across all hardware for this architecture.
+/// Cache and allocation multiple alignment size
+#[cfg(target_arch = "aarch64")]
+pub const ALIGNMENT: usize = 1 << 6;
diff --git a/rust/arrow/src/alloc/mod.rs b/rust/arrow/src/alloc/mod.rs
new file mode 100644
index 0000000..a225d32
--- /dev/null
+++ b/rust/arrow/src/alloc/mod.rs
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines memory-related functions, such as allocate/deallocate/reallocate 
memory
+//! regions, cache and allocation alignments.
+
+use std::mem::size_of;
+use std::ptr::NonNull;
+use std::{
+    alloc::{handle_alloc_error, Layout},
+    sync::atomic::AtomicIsize,
+};
+
+mod alignment;
+mod types;
+
+pub use alignment::ALIGNMENT;
+pub use types::NativeType;
+
+// If this number is not zero after all objects have been `drop`, there is a 
memory leak
+pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
+
+#[inline]
+unsafe fn null_pointer<T: NativeType>() -> NonNull<T> {
+    NonNull::new_unchecked(ALIGNMENT as *mut T)
+}
+
+/// Allocates a cache-aligned memory region of `size` bytes with uninitialized 
values.
+/// This is more performant than using [allocate_aligned_zeroed] when all 
bytes will have
+/// an unknown or non-zero value and is semantically similar to `malloc`.
+pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
+    unsafe {
+        if size == 0 {
+            null_pointer()
+        } else {
+            let size = size * size_of::<T>();
+            ALLOCATIONS.fetch_add(size as isize, 
std::sync::atomic::Ordering::SeqCst);
+
+            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
+            let raw_ptr = std::alloc::alloc(layout) as *mut T;
+            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
+        }
+    }
+}
+
+/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of 
them.
+/// This is more performant than using [allocate_aligned] and setting all 
bytes to zero
+/// and is semantically similar to `calloc`.
+pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
+    unsafe {
+        if size == 0 {
+            null_pointer()
+        } else {
+            let size = size * size_of::<T>();
+            ALLOCATIONS.fetch_add(size as isize, 
std::sync::atomic::Ordering::SeqCst);
+
+            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
+            let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T;
+            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
+        }
+    }
+}
+
+/// # Safety
+///
+/// This function is unsafe because undefined behavior can result if the 
caller does not ensure all
+/// of the following:
+///
+/// * ptr must denote a block of memory currently allocated via this allocator,
+///
+/// * size must be the same size that was used to allocate that block of 
memory,
+pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
+    if ptr != null_pointer() {
+        let size = size * size_of::<T>();
+        ALLOCATIONS.fetch_sub(size as isize, 
std::sync::atomic::Ordering::SeqCst);
+        std::alloc::dealloc(
+            ptr.as_ptr() as *mut u8,
+            Layout::from_size_align_unchecked(size, ALIGNMENT),
+        );
+    }
+}
+
+/// # Safety
+///
+/// This function is unsafe because undefined behavior can result if the 
caller does not ensure all
+/// of the following:
+///
+/// * ptr must be currently allocated via this allocator,
+///
+/// * new_size must be greater than zero.
+///
+/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must 
not overflow (i.e.,
+/// the rounded value must be less than usize::MAX).
+pub unsafe fn reallocate<T: NativeType>(
+    ptr: NonNull<T>,
+    old_size: usize,
+    new_size: usize,
+) -> NonNull<T> {
+    let old_size = old_size * size_of::<T>();
+    let new_size = new_size * size_of::<T>();
+    if ptr == null_pointer() {
+        return allocate_aligned(new_size);
+    }
+
+    if new_size == 0 {
+        free_aligned(ptr, old_size);
+        return null_pointer();
+    }
+
+    ALLOCATIONS.fetch_add(
+        new_size as isize - old_size as isize,
+        std::sync::atomic::Ordering::SeqCst,
+    );
+    let raw_ptr = std::alloc::realloc(
+        ptr.as_ptr() as *mut u8,
+        Layout::from_size_align_unchecked(old_size, ALIGNMENT),
+        new_size,
+    ) as *mut T;
+    NonNull::new(raw_ptr).unwrap_or_else(|| {
+        handle_alloc_error(Layout::from_size_align_unchecked(new_size, 
ALIGNMENT))
+    })
+}
diff --git a/rust/arrow/src/alloc/types.rs b/rust/arrow/src/alloc/types.rs
new file mode 100644
index 0000000..0e177da
--- /dev/null
+++ b/rust/arrow/src/alloc/types.rs
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::datatypes::DataType;
+
+/// A type that Rust's custom allocator knows how to allocate and deallocate.
+/// This is implemented for all Arrow's physical types whose in-memory 
representation
+/// matches Rust's physical types. Consider this trait sealed.
+/// # Safety
+/// Do not implement this trait.
+pub unsafe trait NativeType:
+    Sized + Copy + std::fmt::Debug + std::fmt::Display + PartialEq + Default + 
Sized + 'static
+{
+    type Bytes: AsRef<[u8]>;
+
+    /// Whether a DataType is a valid type for this physical representation.
+    fn is_valid(data_type: &DataType) -> bool;
+
+    /// How this type represents itself as bytes in little endianess.
+    /// This is used for IPC, where data is communicated with a specific 
endianess.
+    fn to_le_bytes(&self) -> Self::Bytes;
+}
+
+unsafe impl NativeType for u8 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        data_type == &DataType::UInt8
+    }
+}
+
+unsafe impl NativeType for u16 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        data_type == &DataType::UInt16
+    }
+}
+
+unsafe impl NativeType for u32 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        data_type == &DataType::UInt32
+    }
+}
+
+unsafe impl NativeType for u64 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        data_type == &DataType::UInt64
+    }
+}
+
+unsafe impl NativeType for i8 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        data_type == &DataType::Int8
+    }
+}
+
+unsafe impl NativeType for i16 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        data_type == &DataType::Int16
+    }
+}
+
+unsafe impl NativeType for i32 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        matches!(
+            data_type,
+            DataType::Int32 | DataType::Date32 | DataType::Time32(_)
+        )
+    }
+}
+
+unsafe impl NativeType for i64 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        matches!(
+            data_type,
+            DataType::Int64
+                | DataType::Date64
+                | DataType::Time64(_)
+                | DataType::Timestamp(_, _)
+        )
+    }
+}
+
+unsafe impl NativeType for f32 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        data_type == &DataType::Float32
+    }
+}
+
+unsafe impl NativeType for f64 {
+    type Bytes = [u8; std::mem::size_of::<Self>()];
+    #[inline]
+    fn to_le_bytes(&self) -> Self::Bytes {
+        Self::to_le_bytes(*self)
+    }
+
+    #[inline]
+    fn is_valid(data_type: &DataType) -> bool {
+        data_type == &DataType::Float64
+    }
+}
diff --git a/rust/arrow/src/array/array_list.rs 
b/rust/arrow/src/array/array_list.rs
index 8458836..f2076b3 100644
--- a/rust/arrow/src/array/array_list.rs
+++ b/rust/arrow/src/array/array_list.rs
@@ -378,12 +378,12 @@ impl fmt::Debug for FixedSizeListArray {
 #[cfg(test)]
 mod tests {
     use crate::{
+        alloc,
         array::ArrayData,
         array::Int32Array,
         buffer::Buffer,
         datatypes::Field,
         datatypes::{Int32Type, ToByteSlice},
-        memory,
         util::bit_util,
     };
 
@@ -993,7 +993,7 @@ mod tests {
     #[test]
     #[should_panic(expected = "memory is not aligned")]
     fn test_primitive_array_alignment() {
-        let ptr = memory::allocate_aligned(8);
+        let ptr = alloc::allocate_aligned::<u8>(8);
         let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
         let buf2 = buf.slice(1);
         let array_data = 
ArrayData::builder(DataType::Int32).add_buffer(buf2).build();
@@ -1003,7 +1003,7 @@ mod tests {
     #[test]
     #[should_panic(expected = "memory is not aligned")]
     fn test_list_array_alignment() {
-        let ptr = memory::allocate_aligned(8);
+        let ptr = alloc::allocate_aligned::<u8>(8);
         let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
         let buf2 = buf.slice(1);
 
diff --git a/rust/arrow/src/array/raw_pointer.rs 
b/rust/arrow/src/array/raw_pointer.rs
index 897dc5b..185e1cb 100644
--- a/rust/arrow/src/array/raw_pointer.rs
+++ b/rust/arrow/src/array/raw_pointer.rs
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::memory;
 use std::ptr::NonNull;
 
 /// This struct is highly `unsafe` and offers the possibility to 
self-reference a [arrow::Buffer] from [arrow::array::ArrayData].
@@ -36,7 +35,11 @@ impl<T> RawPtrBox<T> {
     /// * `ptr` is not aligned to a slice of type `T`. This is guaranteed if 
it was built from a slice of type `T`.
     pub(super) unsafe fn new(ptr: *const u8) -> Self {
         let ptr = NonNull::new(ptr as *mut u8).expect("Pointer cannot be 
null");
-        assert!(memory::is_ptr_aligned::<T>(ptr), "memory is not aligned");
+        assert_eq!(
+            ptr.as_ptr().align_offset(std::mem::align_of::<T>()),
+            0,
+            "memory is not aligned"
+        );
         Self { ptr: ptr.cast() }
     }
 
diff --git a/rust/arrow/src/buffer/immutable.rs 
b/rust/arrow/src/buffer/immutable.rs
index e96bc00..c09e4dd 100644
--- a/rust/arrow/src/buffer/immutable.rs
+++ b/rust/arrow/src/buffer/immutable.rs
@@ -21,9 +21,7 @@ use std::ptr::NonNull;
 use std::sync::Arc;
 use std::{convert::AsRef, usize};
 
-use crate::memory;
 use crate::util::bit_chunk_iterator::BitChunks;
-use crate::util::bit_util;
 use crate::{
     bytes::{Bytes, Deallocation},
     datatypes::ArrowNativeType,
@@ -56,19 +54,11 @@ impl Buffer {
 
     /// Initializes a [Buffer] from a slice of items.
     pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: &T) -> 
Self {
-        // allocate aligned memory buffer
         let slice = items.as_ref();
-        let len = slice.len() * std::mem::size_of::<U>();
-        let capacity = bit_util::round_upto_multiple_of_64(len);
-        let buffer = memory::allocate_aligned(capacity);
-        unsafe {
-            memory::memcpy(
-                buffer,
-                NonNull::new_unchecked(slice.as_ptr() as *mut u8),
-                len,
-            );
-            Buffer::build_with_arguments(buffer, len, 
Deallocation::Native(capacity))
-        }
+        let len = slice.len();
+        let mut buffer = MutableBuffer::with_capacity(len);
+        buffer.extend_from_slice(slice);
+        buffer.into()
     }
 
     /// Creates a buffer from an existing memory region (must already be 
byte-aligned), this
diff --git a/rust/arrow/src/buffer/mutable.rs b/rust/arrow/src/buffer/mutable.rs
index 9f0238f..ddc0501 100644
--- a/rust/arrow/src/buffer/mutable.rs
+++ b/rust/arrow/src/buffer/mutable.rs
@@ -1,9 +1,9 @@
 use std::ptr::NonNull;
 
 use crate::{
+    alloc,
     bytes::{Bytes, Deallocation},
     datatypes::{ArrowNativeType, ToByteSlice},
-    memory,
     util::bit_util,
 };
 
@@ -53,8 +53,14 @@ impl MutableBuffer {
     /// Allocate a new [MutableBuffer] with initial capacity to be at least 
`capacity`.
     #[inline]
     pub fn new(capacity: usize) -> Self {
+        Self::with_capacity(capacity)
+    }
+
+    /// Allocate a new [MutableBuffer] with initial capacity to be at least 
`capacity`.
+    #[inline]
+    pub fn with_capacity(capacity: usize) -> Self {
         let capacity = bit_util::round_upto_multiple_of_64(capacity);
-        let ptr = memory::allocate_aligned(capacity);
+        let ptr = alloc::allocate_aligned(capacity);
         Self {
             data: ptr,
             len: 0,
@@ -75,7 +81,7 @@ impl MutableBuffer {
     /// ```
     pub fn from_len_zeroed(len: usize) -> Self {
         let new_capacity = bit_util::round_upto_multiple_of_64(len);
-        let ptr = memory::allocate_aligned_zeroed(new_capacity);
+        let ptr = alloc::allocate_aligned_zeroed(new_capacity);
         Self {
             data: ptr,
             len,
@@ -324,7 +330,7 @@ unsafe fn reallocate(
 ) -> (NonNull<u8>, usize) {
     let new_capacity = bit_util::round_upto_multiple_of_64(new_capacity);
     let new_capacity = std::cmp::max(new_capacity, old_capacity * 2);
-    let ptr = memory::reallocate(ptr, old_capacity, new_capacity);
+    let ptr = alloc::reallocate(ptr, old_capacity, new_capacity);
     (ptr, new_capacity)
 }
 
@@ -460,7 +466,7 @@ impl std::ops::DerefMut for MutableBuffer {
 
 impl Drop for MutableBuffer {
     fn drop(&mut self) {
-        unsafe { memory::free_aligned(self.data, self.capacity) };
+        unsafe { alloc::free_aligned(self.data, self.capacity) };
     }
 }
 
diff --git a/rust/arrow/src/bytes.rs b/rust/arrow/src/bytes.rs
index 3236549..38fa443 100644
--- a/rust/arrow/src/bytes.rs
+++ b/rust/arrow/src/bytes.rs
@@ -24,7 +24,7 @@ use std::ptr::NonNull;
 use std::sync::Arc;
 use std::{fmt::Debug, fmt::Formatter};
 
-use crate::{ffi, memory};
+use crate::{alloc, ffi};
 
 /// Mode of deallocating memory regions
 pub enum Deallocation {
@@ -126,7 +126,7 @@ impl Drop for Bytes {
     fn drop(&mut self) {
         match &self.deallocation {
             Deallocation::Native(capacity) => {
-                unsafe { memory::free_aligned(self.ptr, *capacity) };
+                unsafe { alloc::free_aligned::<u8>(self.ptr, *capacity) };
             }
             // foreign interface knows how to deallocate itself.
             Deallocation::Foreign(_) => (),
diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs
index c082d61..9c2ca27 100644
--- a/rust/arrow/src/lib.rs
+++ b/rust/arrow/src/lib.rs
@@ -135,6 +135,7 @@
 // introduced to ignore lint errors when upgrading from 2020-04-22 to 
2020-11-14
 #![allow(clippy::float_equality_without_abs, clippy::type_complexity)]
 
+mod alloc;
 mod arch;
 pub mod array;
 pub mod bitmap;
@@ -147,7 +148,6 @@ pub mod error;
 pub mod ffi;
 pub mod ipc;
 pub mod json;
-pub mod memory;
 pub mod record_batch;
 pub mod temporal_conversions;
 pub mod tensor;
diff --git a/rust/arrow/src/memory.rs b/rust/arrow/src/memory.rs
deleted file mode 100644
index 0ea8845..0000000
--- a/rust/arrow/src/memory.rs
+++ /dev/null
@@ -1,277 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines memory-related functions, such as allocate/deallocate/reallocate 
memory
-//! regions, cache and allocation alignments.
-
-use std::mem::align_of;
-use std::ptr::NonNull;
-use std::{
-    alloc::{handle_alloc_error, Layout},
-    sync::atomic::AtomicIsize,
-};
-
-// NOTE: Below code is written for spatial/temporal prefetcher optimizations. 
Memory allocation
-// should align well with usage pattern of cache access and block sizes on 
layers of storage levels from
-// registers to non-volatile memory. These alignments are all cache aware 
alignments incorporated
-// from [cuneiform](https://crates.io/crates/cuneiform) crate. This approach 
mimicks Intel TBB's
-// cache_aligned_allocator which exploits cache locality and minimizes 
prefetch signals
-// resulting in less round trip time between the layers of storage.
-// For further info: https://software.intel.com/en-us/node/506094
-
-// 32-bit architecture and things other than netburst microarchitecture are 
using 64 bytes.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "x86")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// Intel x86_64:
-// L2D streamer from L1:
-// Loads data or instructions from memory to the second-level cache. To use 
the streamer,
-// organize the data or instructions in blocks of 128 bytes, aligned on 128 
bytes.
-// - 
https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "x86_64")]
-pub const ALIGNMENT: usize = 1 << 7;
-
-// 24Kc:
-// Data Line Size
-// - 
https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00346-2B-24K-DTS-04.00.pdf
-// - 
https://gitlab.e.foundation/e/devices/samsung/n7100/stable_android_kernel_samsung_smdk4412/commit/2dbac10263b2f3c561de68b4c369bc679352ccee
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "mips")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "mips64")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Defaults for powerpc
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "powerpc")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Defaults for the ppc 64
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "powerpc64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// e.g.: sifive
-// - 
https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41
-// in general all of them are the same.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "riscv")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// This size is same across all hardware for this architecture.
-// - 
https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2s390_2include_2asm_2cache_8h.html
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "s390x")]
-pub const ALIGNMENT: usize = 1 << 8;
-
-// This size is same across all hardware for this architecture.
-// - 
https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2sparc_2include_2asm_2cache_8h.html#a9400cc2ba37e33279bdbc510a6311fb4
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "sparc")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "sparc64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-// On ARM cache line sizes are fixed. both v6 and v7.
-// Need to add board specific or platform specific things later.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "thumbv6")]
-pub const ALIGNMENT: usize = 1 << 5;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "thumbv7")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Operating Systems cache size determines this.
-// Currently no way to determine this without runtime inference.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "wasm32")]
-pub const ALIGNMENT: usize = FALLBACK_ALIGNMENT;
-
-// Same as v6 and v7.
-// List goes like that:
-// Cortex A, M, R, ARM v7, v7-M, Krait and NeoverseN uses this size.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "arm")]
-pub const ALIGNMENT: usize = 1 << 5;
-
-// Combined from 4 sectors. Volta says 128.
-// Prevent chunk optimizations better to go to the default size.
-// If you have smaller data with less padded functionality then use 32 with 
force option.
-// - 
https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "nvptx")]
-pub const ALIGNMENT: usize = 1 << 7;
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "nvptx64")]
-pub const ALIGNMENT: usize = 1 << 7;
-
-// This size is same across all hardware for this architecture.
-/// Cache and allocation multiple alignment size
-#[cfg(target_arch = "aarch64")]
-pub const ALIGNMENT: usize = 1 << 6;
-
-#[doc(hidden)]
-/// Fallback cache and allocation multiple alignment size
-const FALLBACK_ALIGNMENT: usize = 1 << 6;
-
-///
-/// As you can see this is global and lives as long as the program lives.
-/// Be careful to not write anything to this pointer in any scenario.
-/// If you use allocation methods shown here you won't have any problems.
-const BYPASS_PTR: NonNull<u8> = unsafe { NonNull::new_unchecked(ALIGNMENT as 
*mut u8) };
-
-// If this number is not zero after all objects have been `drop`, there is a 
memory leak
-pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0);
-
-/// Allocates a cache-aligned memory region of `size` bytes with uninitialized 
values.
-/// This is more performant than using [allocate_aligned_zeroed] when all 
bytes will have
-/// an unknown or non-zero value and is semantically similar to `malloc`.
-pub fn allocate_aligned(size: usize) -> NonNull<u8> {
-    unsafe {
-        if size == 0 {
-            // In a perfect world, there is no need to request zero size 
allocation.
-            // Currently, passing zero sized layout to alloc is UB.
-            // This will dodge allocator api for any type.
-            BYPASS_PTR
-        } else {
-            ALLOCATIONS.fetch_add(size as isize, 
std::sync::atomic::Ordering::SeqCst);
-
-            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
-            let raw_ptr = std::alloc::alloc(layout);
-            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
-        }
-    }
-}
-
-/// Allocates a cache-aligned memory region of `size` bytes with `0u8` on all 
of them.
-/// This is more performant than using [allocate_aligned] and setting all 
bytes to zero
-/// and is semantically similar to `calloc`.
-pub fn allocate_aligned_zeroed(size: usize) -> NonNull<u8> {
-    unsafe {
-        if size == 0 {
-            // In a perfect world, there is no need to request zero size 
allocation.
-            // Currently, passing zero sized layout to alloc is UB.
-            // This will dodge allocator api for any type.
-            BYPASS_PTR
-        } else {
-            ALLOCATIONS.fetch_add(size as isize, 
std::sync::atomic::Ordering::SeqCst);
-
-            let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
-            let raw_ptr = std::alloc::alloc_zeroed(layout);
-            NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
-        }
-    }
-}
-
-/// # Safety
-///
-/// This function is unsafe because undefined behavior can result if the 
caller does not ensure all
-/// of the following:
-///
-/// * ptr must denote a block of memory currently allocated via this allocator,
-///
-/// * size must be the same size that was used to allocate that block of 
memory,
-pub unsafe fn free_aligned(ptr: NonNull<u8>, size: usize) {
-    if ptr != BYPASS_PTR {
-        ALLOCATIONS.fetch_sub(size as isize, 
std::sync::atomic::Ordering::SeqCst);
-        std::alloc::dealloc(
-            ptr.as_ptr(),
-            Layout::from_size_align_unchecked(size, ALIGNMENT),
-        );
-    }
-}
-
-/// # Safety
-///
-/// This function is unsafe because undefined behavior can result if the 
caller does not ensure all
-/// of the following:
-///
-/// * ptr must be currently allocated via this allocator,
-///
-/// * new_size must be greater than zero.
-///
-/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must 
not overflow (i.e.,
-/// the rounded value must be less than usize::MAX).
-pub unsafe fn reallocate(
-    ptr: NonNull<u8>,
-    old_size: usize,
-    new_size: usize,
-) -> NonNull<u8> {
-    if ptr == BYPASS_PTR {
-        return allocate_aligned(new_size);
-    }
-
-    if new_size == 0 {
-        free_aligned(ptr, old_size);
-        return BYPASS_PTR;
-    }
-
-    ALLOCATIONS.fetch_add(
-        new_size as isize - old_size as isize,
-        std::sync::atomic::Ordering::SeqCst,
-    );
-    let raw_ptr = std::alloc::realloc(
-        ptr.as_ptr(),
-        Layout::from_size_align_unchecked(old_size, ALIGNMENT),
-        new_size,
-    );
-    NonNull::new(raw_ptr).unwrap_or_else(|| {
-        handle_alloc_error(Layout::from_size_align_unchecked(new_size, 
ALIGNMENT))
-    })
-}
-
-/// # Safety
-///
-/// Behavior is undefined if any of the following conditions are violated:
-///
-/// * `src` must be valid for reads of `len * size_of::<u8>()` bytes.
-///
-/// * `dst` must be valid for writes of `len * size_of::<u8>()` bytes.
-///
-/// * Both `src` and `dst` must be properly aligned.
-///
-/// `memcpy` creates a bitwise copy of `T`, regardless of whether `T` is 
[`Copy`]. If `T` is not
-/// [`Copy`], using both the values in the region beginning at `*src` and the 
region beginning at
-/// `*dst` can [violate memory safety][read-ownership].
-pub unsafe fn memcpy(dst: NonNull<u8>, src: NonNull<u8>, count: usize) {
-    if src != BYPASS_PTR {
-        std::ptr::copy_nonoverlapping(src.as_ptr(), dst.as_ptr(), count)
-    }
-}
-
-pub fn is_ptr_aligned<T>(p: NonNull<u8>) -> bool {
-    p.as_ptr().align_offset(align_of::<T>()) == 0
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_allocate() {
-        for _ in 0..10 {
-            let p = allocate_aligned(1024);
-            // make sure this is 64-byte aligned
-            assert_eq!(0, (p.as_ptr() as usize) % 64);
-            unsafe { free_aligned(p, 1024) };
-        }
-    }
-}

[arrow] branch master updated: ARROW-11627: [Rust] Make allocator be a generic over type T

Reply via email to