This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 7bf7ea5e3 Added support for byte vectors and slices to parquet_derive 
(#3864) (#3878)
7bf7ea5e3 is described below

commit 7bf7ea5e341c15dbd8653b16413459f5fa4784eb
Author: waymost <[email protected]>
AuthorDate: Sat Mar 18 05:02:31 2023 -0700

    Added support for byte vectors and slices to parquet_derive (#3864) (#3878)
---
 parquet/src/data_type.rs            | 10 +++++
 parquet_derive/src/parquet_field.rs | 76 ++++++++++++++++++++++++++++++++-----
 parquet_derive_test/src/lib.rs      | 18 +++++++++
 3 files changed, 95 insertions(+), 9 deletions(-)

diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index 40d54c78e..48ee7f89f 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -199,6 +199,16 @@ impl From<Vec<u8>> for ByteArray {
     }
 }
 
+impl<'a> From<&'a [u8]> for ByteArray {
+    fn from(b: &'a [u8]) -> ByteArray {
+        let mut v = Vec::new();
+        v.extend_from_slice(b);
+        Self {
+            data: Some(ByteBufferPtr::new(v)),
+        }
+    }
+}
+
 impl<'a> From<&'a str> for ByteArray {
     fn from(s: &'a str) -> ByteArray {
         let mut v = Vec::new();
diff --git a/parquet_derive/src/parquet_field.rs 
b/parquet_derive/src/parquet_field.rs
index 48b6d3ac4..ea6878283 100644
--- a/parquet_derive/src/parquet_field.rs
+++ b/parquet_derive/src/parquet_field.rs
@@ -92,6 +92,10 @@ impl Field {
                     Type::TypePath(_) => self.option_into_vals(),
                     _ => unimplemented!("Unsupported type encountered"),
                 },
+                Type::Vec(ref first_type) => match **first_type {
+                    Type::TypePath(_) => self.option_into_vals(),
+                    _ => unimplemented!("Unsupported type encountered"),
+                },
                 ref f => unimplemented!("Unsupported: {:#?}", f),
             },
             Type::Reference(_, ref first_type) => match **first_type {
@@ -99,11 +103,27 @@ impl Field {
                 Type::Option(ref second_type) => match **second_type {
                     Type::TypePath(_) => self.option_into_vals(),
                     Type::Reference(_, ref second_type) => match **second_type 
{
+                        Type::TypePath(_) => self.option_into_vals(),
+                        Type::Slice(ref second_type) => match **second_type {
+                            Type::TypePath(_) => self.option_into_vals(),
+                            ref f => unimplemented!("Unsupported: {:#?}", f),
+                        },
+                        _ => unimplemented!("Unsupported type encountered"),
+                    },
+                    Type::Vec(ref first_type) => match **first_type {
                         Type::TypePath(_) => self.option_into_vals(),
                         _ => unimplemented!("Unsupported type encountered"),
                     },
                     ref f => unimplemented!("Unsupported: {:#?}", f),
                 },
+                Type::Slice(ref second_type) => match **second_type {
+                    Type::TypePath(_) => self.copied_direct_vals(),
+                    ref f => unimplemented!("Unsupported: {:#?}", f),
+                },
+                ref f => unimplemented!("Unsupported: {:#?}", f),
+            },
+            Type::Vec(ref first_type) => match **first_type {
+                Type::TypePath(_) => self.copied_direct_vals(),
                 ref f => unimplemented!("Unsupported: {:#?}", f),
             },
             f => unimplemented!("Unsupported: {:#?}", f),
@@ -116,26 +136,55 @@ impl Field {
                 Type::Option(_) => unimplemented!("Unsupported nesting 
encountered"),
                 Type::Reference(_, ref second_type)
                 | Type::Vec(ref second_type)
-                | Type::Array(ref second_type) => match **second_type {
+                | Type::Array(ref second_type)
+                | Type::Slice(ref second_type) => match **second_type {
                     Type::TypePath(_) => 
Some(self.optional_definition_levels()),
                     _ => unimplemented!("Unsupported nesting encountered"),
                 },
             },
             Type::Reference(_, ref first_type)
             | Type::Vec(ref first_type)
-            | Type::Array(ref first_type) => match **first_type {
+            | Type::Array(ref first_type)
+            | Type::Slice(ref first_type) => match **first_type {
                 Type::TypePath(_) => None,
-                Type::Reference(_, ref second_type)
-                | Type::Vec(ref second_type)
+                Type::Vec(ref second_type)
                 | Type::Array(ref second_type)
-                | Type::Option(ref second_type) => match **second_type {
-                    Type::TypePath(_) => 
Some(self.optional_definition_levels()),
+                | Type::Slice(ref second_type) => match **second_type {
+                    Type::TypePath(_) => None,
                     Type::Reference(_, ref third_type) => match **third_type {
-                        Type::TypePath(_) => 
Some(self.optional_definition_levels()),
+                        Type::TypePath(_) => None,
                         _ => unimplemented!("Unsupported definition 
encountered"),
                     },
                     _ => unimplemented!("Unsupported definition encountered"),
                 },
+                Type::Reference(_, ref second_type) | Type::Option(ref 
second_type) => {
+                    match **second_type {
+                        Type::TypePath(_) => 
Some(self.optional_definition_levels()),
+                        Type::Vec(ref third_type)
+                        | Type::Array(ref third_type)
+                        | Type::Slice(ref third_type) => match **third_type {
+                            Type::TypePath(_) => 
Some(self.optional_definition_levels()),
+                            Type::Reference(_, ref fourth_type) => match 
**fourth_type {
+                                Type::TypePath(_) => {
+                                    Some(self.optional_definition_levels())
+                                }
+                                _ => unimplemented!("Unsupported definition 
encountered"),
+                            },
+                            _ => unimplemented!("Unsupported definition 
encountered"),
+                        },
+                        Type::Reference(_, ref third_type) => match 
**third_type {
+                            Type::TypePath(_) => 
Some(self.optional_definition_levels()),
+                            Type::Slice(ref fourth_type) => match 
**fourth_type {
+                                Type::TypePath(_) => {
+                                    Some(self.optional_definition_levels())
+                                }
+                                _ => unimplemented!("Unsupported definition 
encountered"),
+                            },
+                            _ => unimplemented!("Unsupported definition 
encountered"),
+                        },
+                        _ => unimplemented!("Unsupported definition 
encountered"),
+                    }
+                }
             },
         };
 
@@ -323,6 +372,7 @@ impl Field {
 enum Type {
     Array(Box<Type>),
     Option(Box<Type>),
+    Slice(Box<Type>),
     Vec(Box<Type>),
     TypePath(syn::Type),
     Reference(Option<syn::Lifetime>, Box<Type>),
@@ -374,6 +424,7 @@ impl Type {
             Type::Option(ref first_type)
             | Type::Vec(ref first_type)
             | Type::Array(ref first_type)
+            | Type::Slice(ref first_type)
             | Type::Reference(_, ref first_type) => {
                 Type::leaf_type_recursive_helper(first_type, Some(ty))
             }
@@ -391,6 +442,7 @@ impl Type {
             Type::Option(ref first_type)
             | Type::Vec(ref first_type)
             | Type::Array(ref first_type)
+            | Type::Slice(ref first_type)
             | Type::Reference(_, ref first_type) => match **first_type {
                 Type::TypePath(ref type_) => type_,
                 _ => unimplemented!("leaf_type() should only return shallow 
types"),
@@ -443,7 +495,7 @@ impl Type {
                     }
                 }
             }
-            Type::Vec(ref first_type) => {
+            Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
                 if let Type::TypePath(_) = **first_type {
                     if last_part == "u8" {
                         return BasicType::BYTE_ARRAY;
@@ -484,7 +536,7 @@ impl Type {
                     }
                 }
             }
-            Type::Vec(ref first_type) => {
+            Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
                 if let Type::TypePath(_) = **first_type {
                     if last_part == "u8" {
                         return quote! { None };
@@ -572,6 +624,7 @@ impl Type {
             syn::Type::Path(ref p) => Type::from_type_path(f, p),
             syn::Type::Reference(ref tr) => Type::from_type_reference(f, tr),
             syn::Type::Array(ref ta) => Type::from_type_array(f, ta),
+            syn::Type::Slice(ref ts) => Type::from_type_slice(f, ts),
             other => unimplemented!(
                 "Unable to derive {:?} - it is currently an unsupported 
type\n{:#?}",
                 f.ident.as_ref().unwrap(),
@@ -622,6 +675,11 @@ impl Type {
         let inner_type = Type::from_type(f, ta.elem.as_ref());
         Type::Array(Box::new(inner_type))
     }
+
+    fn from_type_slice(f: &syn::Field, ts: &syn::TypeSlice) -> Self {
+        let inner_type = Type::from_type(f, ts.elem.as_ref());
+        Type::Slice(Box::new(inner_type))
+    }
 }
 
 #[cfg(test)]
diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs
index 746644793..2aa174974 100644
--- a/parquet_derive_test/src/lib.rs
+++ b/parquet_derive_test/src/lib.rs
@@ -42,6 +42,11 @@ struct ACompleteRecord<'a> {
     pub borrowed_maybe_a_string: &'a Option<String>,
     pub borrowed_maybe_a_str: &'a Option<&'a str>,
     pub now: chrono::NaiveDateTime,
+    pub byte_vec: Vec<u8>,
+    pub maybe_byte_vec: Option<Vec<u8>>,
+    pub borrowed_byte_vec: &'a [u8],
+    pub borrowed_maybe_byte_vec: &'a Option<Vec<u8>>,
+    pub borrowed_maybe_borrowed_byte_vec: &'a Option<&'a [u8]>,
 }
 
 #[cfg(test)]
@@ -84,6 +89,11 @@ mod tests {
             OPTIONAL BINARY          borrowed_maybe_a_string (STRING);
             OPTIONAL BINARY          borrowed_maybe_a_str (STRING);
             REQUIRED INT64           now (TIMESTAMP_MILLIS);
+            REQUIRED BINARY          byte_vec;
+            OPTIONAL BINARY          maybe_byte_vec;
+            REQUIRED BINARY          borrowed_byte_vec;
+            OPTIONAL BINARY          borrowed_maybe_byte_vec;
+            OPTIONAL BINARY          borrowed_maybe_borrowed_byte_vec;
         }";
 
         let schema = Arc::new(parse_message_type(schema_str).unwrap());
@@ -92,6 +102,9 @@ mod tests {
         let a_borrowed_string = "cool news".to_owned();
         let maybe_a_string = Some("it's true, I'm a string".to_owned());
         let maybe_a_str = Some(&a_str[..]);
+        let borrowed_byte_vec = vec![0x68, 0x69, 0x70];
+        let borrowed_maybe_byte_vec = Some(vec![0x71, 0x72]);
+        let borrowed_maybe_borrowed_byte_vec = Some(&borrowed_byte_vec[..]);
 
         let drs: Vec<ACompleteRecord> = vec![ACompleteRecord {
             a_bool: true,
@@ -115,6 +128,11 @@ mod tests {
             borrowed_maybe_a_string: &maybe_a_string,
             borrowed_maybe_a_str: &maybe_a_str,
             now: chrono::Utc::now().naive_local(),
+            byte_vec: vec![0x65, 0x66, 0x67],
+            maybe_byte_vec: Some(vec![0x88, 0x89, 0x90]),
+            borrowed_byte_vec: &borrowed_byte_vec,
+            borrowed_maybe_byte_vec: &borrowed_maybe_byte_vec,
+            borrowed_maybe_borrowed_byte_vec: 
&borrowed_maybe_borrowed_byte_vec,
         }];
 
         let generated_schema = drs.as_slice().schema().unwrap();

Reply via email to