[ 
https://issues.apache.org/jira/browse/ARROW-2398?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16428292#comment-16428292
 ] 

ASF GitHub Bot commented on ARROW-2398:
---------------------------------------

xhochy closed pull request #1838: ARROW-2398: [Rust] Create Builder<T> for 
building buffers directly in aligned memory
URL: https://github.com/apache/arrow/pull/1838
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/rust/README.md b/rust/README.md
index 20e4b73f1..2bbc7f104 100644
--- a/rust/README.md
+++ b/rust/README.md
@@ -25,17 +25,37 @@ This is a starting point for a native Rust implementation 
of Arrow.
 
 The current code demonstrates arrays of primitive types and structs.
 
-## Example
+## Creating an Array from a Vec
 
 ```rust
-let _schema = Schema::new(vec![
-    Field::new("a", DataType::Int32, false),
-    Field::new("b", DataType::Float32, false),
-]);
-
-let a = Rc::new(Array::from(vec![1,2,3,4,5]));
-let b = Rc::new(Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]));
-let _ = Rc::new(Array::from(vec![a,b]));
+// create a memory-aligned Arrow array from an existing Vec
+let array = Array::from(vec![1,2,3,4,5]);
+
+match array.data() {
+    &ArrayData::Int32(ref buffer) => {
+        println!("array contents: {:?}", buffer.iter().collect::<Vec<i32>>());
+    }
+    _ => {}
+}
+```
+
+## Creating an Array from a Builder
+
+```rust
+let mut builder: Builder<i32> = Builder::new();
+for i in 0..10 {
+    builder.push(i);
+}
+let buffer = builder.finish();
+let array = Array::from(buffer);
+```
+
+## Run Examples
+
+Examples can be run using the `cargo run --example` command. For example:
+
+```bash
+cargo run --example array_from_builder
 ```
 
 ## Run Tests
diff --git a/rust/examples/array_from_builder.rs 
b/rust/examples/array_from_builder.rs
new file mode 100644
index 000000000..3a273a64d
--- /dev/null
+++ b/rust/examples/array_from_builder.rs
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate arrow;
+
+use arrow::array::*;
+use arrow::buffer::*;
+use arrow::builder::*;
+
+fn main() {
+    let mut builder: Builder<i32> = Builder::new();
+    for i in 0..10 {
+        builder.push(i);
+    }
+    let buffer = builder.finish();
+
+    println!("buffer length: {}", buffer.len());
+    println!("buffer contents: {:?}", buffer.iter().collect::<Vec<i32>>());
+
+    // note that the builder can no longer be used once it has built a buffer, 
so either
+    // of the following calls will fail
+
+    //    builder.push(123);
+    //    builder.build();
+
+    // create a memory-aligned Arrow from the builder (zero-copy)
+    let array = Array::from(buffer);
+
+    match array.data() {
+        &ArrayData::Int32(ref buffer) => {
+            println!("array contents: {:?}", 
buffer.iter().collect::<Vec<i32>>());
+        }
+        _ => {}
+    }
+}
diff --git a/rust/examples/array_from_vec.rs b/rust/examples/array_from_vec.rs
new file mode 100644
index 000000000..8cb4b268f
--- /dev/null
+++ b/rust/examples/array_from_vec.rs
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate arrow;
+
+use arrow::array::*;
+
+fn main() {
+    // create a memory-aligned Arrow array from an existing Vec
+    let array = Array::from(vec![1, 2, 3, 4, 5]);
+
+    match array.data() {
+        &ArrayData::Int32(ref buffer) => {
+            println!("array contents: {:?}", 
buffer.iter().collect::<Vec<i32>>());
+        }
+        _ => {}
+    }
+}
diff --git a/rust/src/array.rs b/rust/src/array.rs
index 1c0a653d7..09f0c9500 100644
--- a/rust/src/array.rs
+++ b/rust/src/array.rs
@@ -42,12 +42,17 @@ pub enum ArrayData {
 }
 
 macro_rules! arraydata_from_primitive {
-    ($DT: ty, $AT: ident) => {
+    ($DT:ty, $AT:ident) => {
         impl From<Vec<$DT>> for ArrayData {
             fn from(v: Vec<$DT>) -> Self {
                 ArrayData::$AT(Buffer::from(v))
             }
         }
+        impl From<Buffer<$DT>> for ArrayData {
+            fn from(v: Buffer<$DT>) -> Self {
+                ArrayData::$AT(v)
+            }
+        }
     };
 }
 
@@ -91,7 +96,7 @@ impl Array {
 }
 
 macro_rules! array_from_primitive {
-    ($DT: ty) => {
+    ($DT:ty) => {
         impl From<Vec<$DT>> for Array {
             fn from(v: Vec<$DT>) -> Self {
                 Array {
@@ -102,6 +107,16 @@ macro_rules! array_from_primitive {
                 }
             }
         }
+        impl From<Buffer<$DT>> for Array {
+            fn from(v: Buffer<$DT>) -> Self {
+                Array {
+                    len: v.len() as i32,
+                    null_count: 0,
+                    validity_bitmap: None,
+                    data: ArrayData::from(v),
+                }
+            }
+        }
     };
 }
 
@@ -117,7 +132,7 @@ array_from_primitive!(i32);
 array_from_primitive!(i64);
 
 macro_rules! array_from_optional_primitive {
-    ($DT: ty, $DEFAULT: expr) => {
+    ($DT:ty, $DEFAULT:expr) => {
         impl From<Vec<Option<$DT>>> for Array {
             fn from(v: Vec<Option<$DT>>) -> Self {
                 let mut null_count = 0;
diff --git a/rust/src/buffer.rs b/rust/src/buffer.rs
index 517583e90..ab90a5b08 100644
--- a/rust/src/buffer.rs
+++ b/rust/src/buffer.rs
@@ -18,16 +18,23 @@
 use bytes::Bytes;
 use libc;
 use std::mem;
+use std::ptr;
 use std::slice;
 
 use super::memory::*;
 
+/// Buffer<T> is essentially just a Vec<T> for fixed-width primitive types and 
the start of the
+/// memory region is aligned at a 64-byte boundary
 pub struct Buffer<T> {
     data: *const T,
     len: i32,
 }
 
 impl<T> Buffer<T> {
+    pub fn from_raw_parts(data: *const T, len: i32) -> Self {
+        Buffer { data, len }
+    }
+
     pub fn len(&self) -> i32 {
         self.len
     }
@@ -43,10 +50,12 @@ impl<T> Buffer<T> {
         unsafe { slice::from_raw_parts(self.data.offset(start as isize), (end 
- start) as usize) }
     }
 
+    /// Get a reference to the value at the specified offset
     pub fn get(&self, i: usize) -> &T {
         unsafe { &(*self.data.offset(i as isize)) }
     }
 
+    /// Deprecated method (used by Bitmap)
     pub fn set(&mut self, i: usize, v: T) {
         unsafe {
             let p = mem::transmute::<*const T, *mut T>(self.data);
@@ -54,6 +63,7 @@ impl<T> Buffer<T> {
         }
     }
 
+    /// Return an iterator over the values in the buffer
     pub fn iter(&self) -> BufferIterator<T> {
         BufferIterator {
             data: self.data,
@@ -92,7 +102,7 @@ where
 }
 
 macro_rules! array_from_primitive {
-    ($DT: ty) => {
+    ($DT:ty) => {
         impl From<Vec<$DT>> for Buffer<$DT> {
             fn from(v: Vec<$DT>) -> Self {
                 // allocate aligned memory buffer
diff --git a/rust/src/builder.rs b/rust/src/builder.rs
new file mode 100644
index 000000000..1cc024042
--- /dev/null
+++ b/rust/src/builder.rs
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use bytes::Bytes;
+use libc;
+use std::mem;
+use std::ptr;
+use std::slice;
+
+use super::buffer::*;
+use super::memory::*;
+
+/// Buffer builder with zero-copy build method
+pub struct Builder<T> {
+    data: *mut T,
+    len: usize,
+    capacity: usize,
+}
+
+impl<T> Builder<T> {
+    /// Creates a builder with a default capacity
+    pub fn new() -> Self {
+        Builder::with_capacity(64)
+    }
+
+    /// Creates a builder with a fixed capacity
+    pub fn with_capacity(capacity: usize) -> Self {
+        let sz = mem::size_of::<T>();
+        let buffer = allocate_aligned((capacity * sz) as i64).unwrap();
+        Builder {
+            len: 0,
+            capacity,
+            data: unsafe { mem::transmute::<*const u8, *mut T>(buffer) },
+        }
+    }
+
+    /// Push a value into the builder, growing the internal buffer as needed
+    pub fn push(&mut self, v: T) {
+        assert!(!self.data.is_null());
+        if self.len == self.capacity {
+            let sz = mem::size_of::<T>();
+            let new_capacity = self.capacity * 2;
+            unsafe {
+                let old_buffer = self.data;
+                let new_buffer = allocate_aligned((new_capacity * sz) as 
i64).unwrap();
+                libc::memcpy(
+                    mem::transmute::<*const u8, *mut libc::c_void>(new_buffer),
+                    mem::transmute::<*const T, *const 
libc::c_void>(old_buffer),
+                    self.len * sz,
+                );
+                self.capacity = new_capacity;
+                self.data = mem::transmute::<*const u8, *mut T>(new_buffer);
+                mem::drop(old_buffer);
+            }
+        }
+        assert!(self.len < self.capacity);
+        unsafe {
+            *self.data.offset(self.len as isize) = v;
+        }
+        self.len += 1;
+    }
+
+    /// Build a Buffer from the existing memory
+    pub fn finish(&mut self) -> Buffer<T> {
+        assert!(!self.data.is_null());
+        let p = unsafe { mem::transmute::<*mut T, *const T>(self.data) };
+        self.data = ptr::null_mut(); // ensure builder cannot be re-used
+        Buffer::from_raw_parts(p, self.len as i32)
+    }
+}
+
+impl<T> Drop for Builder<T> {
+    fn drop(&mut self) {
+        if !self.data.is_null() {
+            mem::drop(self.data)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_builder_i32_empty() {
+        let mut b: Builder<i32> = Builder::with_capacity(5);
+        let a = b.finish();
+        assert_eq!(0, a.len());
+    }
+
+    #[test]
+    fn test_builder_i32() {
+        let mut b: Builder<i32> = Builder::with_capacity(5);
+        for i in 0..5 {
+            b.push(i);
+        }
+        let a = b.finish();
+        assert_eq!(5, a.len());
+        for i in 0..5 {
+            assert_eq!(&i, a.get(i as usize));
+        }
+    }
+
+    #[test]
+    fn test_builder_i32_grow_buffer() {
+        let mut b: Builder<i32> = Builder::with_capacity(2);
+        for i in 0..5 {
+            b.push(i);
+        }
+        let a = b.finish();
+        assert_eq!(5, a.len());
+        for i in 0..5 {
+            assert_eq!(&i, a.get(i as usize));
+        }
+    }
+
+}
diff --git a/rust/src/datatypes.rs b/rust/src/datatypes.rs
index 1e1afe64b..85278f7bb 100644
--- a/rust/src/datatypes.rs
+++ b/rust/src/datatypes.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::fmt;
+use super::error::ArrowError;
 use serde_json;
 use serde_json::Value;
-use super::error::ArrowError;
+use std::fmt;
 
 #[derive(Debug, Clone, PartialEq)]
 pub enum DataType {
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index bfcd83dc5..6ab3daabb 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -24,6 +24,7 @@ extern crate serde_json;
 pub mod array;
 pub mod bitmap;
 pub mod buffer;
+pub mod builder;
 pub mod datatypes;
 pub mod error;
 pub mod list;


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Rust] Provide a zero-copy builder for type-safe Buffer<T>
> ----------------------------------------------------------
>
>                 Key: ARROW-2398
>                 URL: https://issues.apache.org/jira/browse/ARROW-2398
>             Project: Apache Arrow
>          Issue Type: New Feature
>          Components: Rust
>            Reporter: Andy Grove
>            Assignee: Andy Grove
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.10.0
>
>
> This PR implements a builder so that buffers can be populated directly in 
> aligned memory (as opposed to being created from Vec<T>).
>  
> https://github.com/apache/arrow/pull/1838



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to