This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 29c376d  ARROW-2398: [Rust] Create Builder<T> for building buffers 
directly in aligned memory
29c376d is described below

commit 29c376de9f09f9ff2d18076241178a456f1279ef
Author: Andy Grove <andygrov...@gmail.com>
AuthorDate: Fri Apr 6 15:15:36 2018 +0200

    ARROW-2398: [Rust] Create Builder<T> for building buffers directly in 
aligned memory
    
    Also adds our first example
    
    Author: Andy Grove <andygrov...@gmail.com>
    
    Closes #1838 from andygrove/buffer_builder and squashes the following 
commits:
    
    940ee5e <Andy Grove> add missing file, also rustfmt again
    b649f3a <Andy Grove> add missing file
    e4347c1 <Andy Grove> move builder into separate file
    7fac96e <Andy Grove> rename Builder build() to finish()
    ee29eab <Andy Grove> examples
    00fe0da <Andy Grove> Improve examples, add support for creating Array from 
Buffer
    c1383a8 <Andy Grove> ran rustfmt using nightly
    89a9317 <Andy Grove> update README with real example
    3d68f9c <Andy Grove> Create Builder<T> for building buffers with zero-copy 
on build
---
 rust/README.md                                  |  38 +++++--
 rust/examples/array_from_builder.rs             |  49 +++++++++
 rust/{src/lib.rs => examples/array_from_vec.rs} |  24 +++--
 rust/src/array.rs                               |  21 +++-
 rust/src/buffer.rs                              |  12 ++-
 rust/src/builder.rs                             | 131 ++++++++++++++++++++++++
 rust/src/datatypes.rs                           |   4 +-
 rust/src/lib.rs                                 |   1 +
 8 files changed, 254 insertions(+), 26 deletions(-)

diff --git a/rust/README.md b/rust/README.md
index 20e4b73..2bbc7f1 100644
--- a/rust/README.md
+++ b/rust/README.md
@@ -25,17 +25,37 @@ This is a starting point for a native Rust implementation 
of Arrow.
 
 The current code demonstrates arrays of primitive types and structs.
 
-## Example
+## Creating an Array from a Vec
 
 ```rust
-let _schema = Schema::new(vec![
-    Field::new("a", DataType::Int32, false),
-    Field::new("b", DataType::Float32, false),
-]);
-
-let a = Rc::new(Array::from(vec![1,2,3,4,5]));
-let b = Rc::new(Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]));
-let _ = Rc::new(Array::from(vec![a,b]));
+// create a memory-aligned Arrow array from an existing Vec
+let array = Array::from(vec![1,2,3,4,5]);
+
+match array.data() {
+    &ArrayData::Int32(ref buffer) => {
+        println!("array contents: {:?}", buffer.iter().collect::<Vec<i32>>());
+    }
+    _ => {}
+}
+```
+
+## Creating an Array from a Builder
+
+```rust
+let mut builder: Builder<i32> = Builder::new();
+for i in 0..10 {
+    builder.push(i);
+}
+let buffer = builder.finish();
+let array = Array::from(buffer);
+```
+
+## Run Examples
+
+Examples can be run using the `cargo run --example` command. For example:
+
+```bash
+cargo run --example array_from_builder
 ```
 
 ## Run Tests
diff --git a/rust/examples/array_from_builder.rs 
b/rust/examples/array_from_builder.rs
new file mode 100644
index 0000000..3a273a6
--- /dev/null
+++ b/rust/examples/array_from_builder.rs
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate arrow;
+
+use arrow::array::*;
+use arrow::buffer::*;
+use arrow::builder::*;
+
+fn main() {
+    let mut builder: Builder<i32> = Builder::new();
+    for i in 0..10 {
+        builder.push(i);
+    }
+    let buffer = builder.finish();
+
+    println!("buffer length: {}", buffer.len());
+    println!("buffer contents: {:?}", buffer.iter().collect::<Vec<i32>>());
+
+    // note that the builder can no longer be used once it has built a buffer, 
so either
+    // of the following calls will fail
+
+    //    builder.push(123);
+    //    builder.build();
+
+    // create a memory-aligned Arrow from the builder (zero-copy)
+    let array = Array::from(buffer);
+
+    match array.data() {
+        &ArrayData::Int32(ref buffer) => {
+            println!("array contents: {:?}", 
buffer.iter().collect::<Vec<i32>>());
+        }
+        _ => {}
+    }
+}
diff --git a/rust/src/lib.rs b/rust/examples/array_from_vec.rs
similarity index 69%
copy from rust/src/lib.rs
copy to rust/examples/array_from_vec.rs
index bfcd83d..8cb4b26 100644
--- a/rust/src/lib.rs
+++ b/rust/examples/array_from_vec.rs
@@ -15,16 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
-extern crate bytes;
-extern crate libc;
+extern crate arrow;
 
-#[macro_use]
-extern crate serde_json;
+use arrow::array::*;
 
-pub mod array;
-pub mod bitmap;
-pub mod buffer;
-pub mod datatypes;
-pub mod error;
-pub mod list;
-pub mod memory;
+fn main() {
+    // create a memory-aligned Arrow array from an existing Vec
+    let array = Array::from(vec![1, 2, 3, 4, 5]);
+
+    match array.data() {
+        &ArrayData::Int32(ref buffer) => {
+            println!("array contents: {:?}", 
buffer.iter().collect::<Vec<i32>>());
+        }
+        _ => {}
+    }
+}
diff --git a/rust/src/array.rs b/rust/src/array.rs
index 1c0a653..09f0c95 100644
--- a/rust/src/array.rs
+++ b/rust/src/array.rs
@@ -42,12 +42,17 @@ pub enum ArrayData {
 }
 
 macro_rules! arraydata_from_primitive {
-    ($DT: ty, $AT: ident) => {
+    ($DT:ty, $AT:ident) => {
         impl From<Vec<$DT>> for ArrayData {
             fn from(v: Vec<$DT>) -> Self {
                 ArrayData::$AT(Buffer::from(v))
             }
         }
+        impl From<Buffer<$DT>> for ArrayData {
+            fn from(v: Buffer<$DT>) -> Self {
+                ArrayData::$AT(v)
+            }
+        }
     };
 }
 
@@ -91,7 +96,7 @@ impl Array {
 }
 
 macro_rules! array_from_primitive {
-    ($DT: ty) => {
+    ($DT:ty) => {
         impl From<Vec<$DT>> for Array {
             fn from(v: Vec<$DT>) -> Self {
                 Array {
@@ -102,6 +107,16 @@ macro_rules! array_from_primitive {
                 }
             }
         }
+        impl From<Buffer<$DT>> for Array {
+            fn from(v: Buffer<$DT>) -> Self {
+                Array {
+                    len: v.len() as i32,
+                    null_count: 0,
+                    validity_bitmap: None,
+                    data: ArrayData::from(v),
+                }
+            }
+        }
     };
 }
 
@@ -117,7 +132,7 @@ array_from_primitive!(i32);
 array_from_primitive!(i64);
 
 macro_rules! array_from_optional_primitive {
-    ($DT: ty, $DEFAULT: expr) => {
+    ($DT:ty, $DEFAULT:expr) => {
         impl From<Vec<Option<$DT>>> for Array {
             fn from(v: Vec<Option<$DT>>) -> Self {
                 let mut null_count = 0;
diff --git a/rust/src/buffer.rs b/rust/src/buffer.rs
index 517583e..ab90a5b 100644
--- a/rust/src/buffer.rs
+++ b/rust/src/buffer.rs
@@ -18,16 +18,23 @@
 use bytes::Bytes;
 use libc;
 use std::mem;
+use std::ptr;
 use std::slice;
 
 use super::memory::*;
 
+/// Buffer<T> is essentially just a Vec<T> for fixed-width primitive types and 
the start of the
+/// memory region is aligned at a 64-byte boundary
 pub struct Buffer<T> {
     data: *const T,
     len: i32,
 }
 
 impl<T> Buffer<T> {
+    pub fn from_raw_parts(data: *const T, len: i32) -> Self {
+        Buffer { data, len }
+    }
+
     pub fn len(&self) -> i32 {
         self.len
     }
@@ -43,10 +50,12 @@ impl<T> Buffer<T> {
         unsafe { slice::from_raw_parts(self.data.offset(start as isize), (end 
- start) as usize) }
     }
 
+    /// Get a reference to the value at the specified offset
     pub fn get(&self, i: usize) -> &T {
         unsafe { &(*self.data.offset(i as isize)) }
     }
 
+    /// Deprecated method (used by Bitmap)
     pub fn set(&mut self, i: usize, v: T) {
         unsafe {
             let p = mem::transmute::<*const T, *mut T>(self.data);
@@ -54,6 +63,7 @@ impl<T> Buffer<T> {
         }
     }
 
+    /// Return an iterator over the values in the buffer
     pub fn iter(&self) -> BufferIterator<T> {
         BufferIterator {
             data: self.data,
@@ -92,7 +102,7 @@ where
 }
 
 macro_rules! array_from_primitive {
-    ($DT: ty) => {
+    ($DT:ty) => {
         impl From<Vec<$DT>> for Buffer<$DT> {
             fn from(v: Vec<$DT>) -> Self {
                 // allocate aligned memory buffer
diff --git a/rust/src/builder.rs b/rust/src/builder.rs
new file mode 100644
index 0000000..1cc0240
--- /dev/null
+++ b/rust/src/builder.rs
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use bytes::Bytes;
+use libc;
+use std::mem;
+use std::ptr;
+use std::slice;
+
+use super::buffer::*;
+use super::memory::*;
+
+/// Buffer builder with zero-copy build method
+pub struct Builder<T> {
+    data: *mut T,
+    len: usize,
+    capacity: usize,
+}
+
+impl<T> Builder<T> {
+    /// Creates a builder with a default capacity
+    pub fn new() -> Self {
+        Builder::with_capacity(64)
+    }
+
+    /// Creates a builder with a fixed capacity
+    pub fn with_capacity(capacity: usize) -> Self {
+        let sz = mem::size_of::<T>();
+        let buffer = allocate_aligned((capacity * sz) as i64).unwrap();
+        Builder {
+            len: 0,
+            capacity,
+            data: unsafe { mem::transmute::<*const u8, *mut T>(buffer) },
+        }
+    }
+
+    /// Push a value into the builder, growing the internal buffer as needed
+    pub fn push(&mut self, v: T) {
+        assert!(!self.data.is_null());
+        if self.len == self.capacity {
+            let sz = mem::size_of::<T>();
+            let new_capacity = self.capacity * 2;
+            unsafe {
+                let old_buffer = self.data;
+                let new_buffer = allocate_aligned((new_capacity * sz) as 
i64).unwrap();
+                libc::memcpy(
+                    mem::transmute::<*const u8, *mut libc::c_void>(new_buffer),
+                    mem::transmute::<*const T, *const 
libc::c_void>(old_buffer),
+                    self.len * sz,
+                );
+                self.capacity = new_capacity;
+                self.data = mem::transmute::<*const u8, *mut T>(new_buffer);
+                mem::drop(old_buffer);
+            }
+        }
+        assert!(self.len < self.capacity);
+        unsafe {
+            *self.data.offset(self.len as isize) = v;
+        }
+        self.len += 1;
+    }
+
+    /// Build a Buffer from the existing memory
+    pub fn finish(&mut self) -> Buffer<T> {
+        assert!(!self.data.is_null());
+        let p = unsafe { mem::transmute::<*mut T, *const T>(self.data) };
+        self.data = ptr::null_mut(); // ensure builder cannot be re-used
+        Buffer::from_raw_parts(p, self.len as i32)
+    }
+}
+
+impl<T> Drop for Builder<T> {
+    fn drop(&mut self) {
+        if !self.data.is_null() {
+            mem::drop(self.data)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_builder_i32_empty() {
+        let mut b: Builder<i32> = Builder::with_capacity(5);
+        let a = b.finish();
+        assert_eq!(0, a.len());
+    }
+
+    #[test]
+    fn test_builder_i32() {
+        let mut b: Builder<i32> = Builder::with_capacity(5);
+        for i in 0..5 {
+            b.push(i);
+        }
+        let a = b.finish();
+        assert_eq!(5, a.len());
+        for i in 0..5 {
+            assert_eq!(&i, a.get(i as usize));
+        }
+    }
+
+    #[test]
+    fn test_builder_i32_grow_buffer() {
+        let mut b: Builder<i32> = Builder::with_capacity(2);
+        for i in 0..5 {
+            b.push(i);
+        }
+        let a = b.finish();
+        assert_eq!(5, a.len());
+        for i in 0..5 {
+            assert_eq!(&i, a.get(i as usize));
+        }
+    }
+
+}
diff --git a/rust/src/datatypes.rs b/rust/src/datatypes.rs
index 1e1afe6..85278f7 100644
--- a/rust/src/datatypes.rs
+++ b/rust/src/datatypes.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::fmt;
+use super::error::ArrowError;
 use serde_json;
 use serde_json::Value;
-use super::error::ArrowError;
+use std::fmt;
 
 #[derive(Debug, Clone, PartialEq)]
 pub enum DataType {
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index bfcd83d..6ab3daa 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -24,6 +24,7 @@ extern crate serde_json;
 pub mod array;
 pub mod bitmap;
 pub mod buffer;
+pub mod builder;
 pub mod datatypes;
 pub mod error;
 pub mod list;

-- 
To stop receiving notification emails like this one, please contact
u...@apache.org.

Reply via email to