This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 894dd17  ARROW-12043: [Rust] [Parquet] Write FSB arrays
894dd17 is described below

commit 894dd17c9602439c2b84c0b849fb0966606ceb1c
Author: Neville Dipale <[email protected]>
AuthorDate: Sun Mar 28 11:01:56 2021 +0200

    ARROW-12043: [Rust] [Parquet] Write FSB arrays
    
    Minor change to compute the levels for FSB arrays and write them out. Added 
a roundtrip test.
    
    Closes #9771 from nevi-me/ARROW-12043
    
    Authored-by: Neville Dipale <[email protected]>
    Signed-off-by: Neville Dipale <[email protected]>
---
 rust/parquet/src/arrow/arrow_writer.rs | 28 ++++++++++++++++++++++------
 rust/parquet/src/arrow/levels.rs       | 30 ++++++++++++++++++++----------
 rust/parquet/src/arrow/mod.rs          |  2 +-
 3 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/rust/parquet/src/arrow/arrow_writer.rs 
b/rust/parquet/src/arrow/arrow_writer.rs
index 1ce907f..a3577ca 100644
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ b/rust/parquet/src/arrow/arrow_writer.rs
@@ -146,7 +146,8 @@ fn write_leaves(
         | ArrowDataType::Binary
         | ArrowDataType::Utf8
         | ArrowDataType::LargeUtf8
-        | ArrowDataType::Decimal(_, _) => {
+        | ArrowDataType::Decimal(_, _)
+        | ArrowDataType::FixedSizeBinary(_) => {
             let mut col_writer = get_col_writer(&mut row_group_writer)?;
             write_leaf(
                 &mut col_writer,
@@ -189,11 +190,14 @@ fn write_leaves(
         ArrowDataType::Float16 => Err(ParquetError::ArrowError(
             "Float16 arrays not supported".to_string(),
         )),
-        ArrowDataType::FixedSizeList(_, _)
-        | ArrowDataType::FixedSizeBinary(_)
-        | ArrowDataType::Union(_) => Err(ParquetError::NYI(
-            "Attempting to write an Arrow type that is not yet 
implemented".to_string(),
-        )),
+        ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_) => {
+            Err(ParquetError::NYI(
+                format!(
+                    "Attempting to write an Arrow type {:?} to parquet that is 
not yet implemented", 
+                    array.data_type()
+                )
+            ))
+        }
     }
 }
 
@@ -1225,6 +1229,18 @@ mod tests {
     }
 
     #[test]
+    fn fixed_size_binary_single_column() {
+        let mut builder = FixedSizeBinaryBuilder::new(16, 4);
+        builder.append_value(b"0123").unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(b"8910").unwrap();
+        builder.append_value(b"1112").unwrap();
+        let array = Arc::new(builder.finish());
+
+        one_column_roundtrip("timestamp_millisecond_single_column", array, 
true);
+    }
+
+    #[test]
     fn string_single_column() {
         let raw_values: Vec<_> = (0..SMALL_SIZE).map(|i| 
i.to_string()).collect();
         let raw_strs = raw_values.iter().map(|s| s.as_str());
diff --git a/rust/parquet/src/arrow/levels.rs b/rust/parquet/src/arrow/levels.rs
index 641e330..2168670 100644
--- a/rust/parquet/src/arrow/levels.rs
+++ b/rust/parquet/src/arrow/levels.rs
@@ -136,7 +136,8 @@ impl LevelInfo {
             | DataType::Interval(_)
             | DataType::Binary
             | DataType::LargeBinary
-            | DataType::Decimal(_, _) => {
+            | DataType::Decimal(_, _)
+            | DataType::FixedSizeBinary(_) => {
                 // we return a vector of 1 value to represent the primitive
                 vec![self.calculate_child_levels(
                     array_offsets,
@@ -145,7 +146,6 @@ impl LevelInfo {
                     field.is_nullable(),
                 )]
             }
-            DataType::FixedSizeBinary(_) => unimplemented!(),
             DataType::List(list_field) | DataType::LargeList(list_field) => {
                 // Calculate the list level
                 let list_level = self.calculate_child_levels(
@@ -189,7 +189,8 @@ impl LevelInfo {
                     | DataType::Utf8
                     | DataType::LargeUtf8
                     | DataType::Dictionary(_, _)
-                    | DataType::Decimal(_, _) => {
+                    | DataType::Decimal(_, _)
+                    | DataType::FixedSizeBinary(_) => {
                         vec![list_level.calculate_child_levels(
                             child_offsets,
                             child_mask,
@@ -197,7 +198,6 @@ impl LevelInfo {
                             list_field.is_nullable(),
                         )]
                     }
-                    DataType::FixedSizeBinary(_) => unimplemented!(),
                     DataType::List(_) | DataType::LargeList(_) | 
DataType::Struct(_) => {
                         list_level.calculate_array_levels(&child_array, 
list_field)
                     }
@@ -297,9 +297,10 @@ impl LevelInfo {
         is_list: bool,
         is_nullable: bool,
     ) -> Self {
-        let mut definition = vec![];
-        let mut repetition = vec![];
-        let mut merged_array_mask = vec![];
+        let min_len = *(array_offsets.last().unwrap()) as usize;
+        let mut definition = Vec::with_capacity(min_len);
+        let mut repetition = Vec::with_capacity(min_len);
+        let mut merged_array_mask = Vec::with_capacity(min_len);
 
         // determine the total level increment based on data types
         let max_definition = match is_list {
@@ -624,9 +625,18 @@ impl LevelInfo {
                 let masks = offsets.windows(2).map(|w| w[1] > w[0]).collect();
                 (offsets, masks)
             }
-            DataType::FixedSizeBinary(_)
-            | DataType::FixedSizeList(_, _)
-            | DataType::Union(_) => {
+            DataType::FixedSizeBinary(value_len) => {
+                let array_mask = match array.data().null_buffer() {
+                    Some(buf) => get_bool_array_slice(buf, array.offset(), 
array.len()),
+                    None => vec![true; array.len()],
+                };
+                let value_len = *value_len as i64;
+                (
+                    (0..=(array.len() as i64)).map(|v| v * 
value_len).collect(),
+                    array_mask,
+                )
+            }
+            DataType::FixedSizeList(_, _) | DataType::Union(_) => {
                 unimplemented!("Getting offsets not yet implemented")
             }
         }
diff --git a/rust/parquet/src/arrow/mod.rs b/rust/parquet/src/arrow/mod.rs
index 9095259..b1aa39e 100644
--- a/rust/parquet/src/arrow/mod.rs
+++ b/rust/parquet/src/arrow/mod.rs
@@ -53,7 +53,7 @@ pub(in crate::arrow) mod array_reader;
 pub mod arrow_reader;
 pub mod arrow_writer;
 pub(in crate::arrow) mod converter;
-pub mod levels;
+pub(in crate::arrow) mod levels;
 pub(in crate::arrow) mod record_reader;
 pub mod schema;
 

Reply via email to