This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 7bf7ea5e3 Added support for byte vectors and slices to parquet_derive
(#3864) (#3878)
7bf7ea5e3 is described below
commit 7bf7ea5e341c15dbd8653b16413459f5fa4784eb
Author: waymost <[email protected]>
AuthorDate: Sat Mar 18 05:02:31 2023 -0700
Added support for byte vectors and slices to parquet_derive (#3864) (#3878)
---
parquet/src/data_type.rs | 10 +++++
parquet_derive/src/parquet_field.rs | 76 ++++++++++++++++++++++++++++++++-----
parquet_derive_test/src/lib.rs | 18 +++++++++
3 files changed, 95 insertions(+), 9 deletions(-)
diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index 40d54c78e..48ee7f89f 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -199,6 +199,16 @@ impl From<Vec<u8>> for ByteArray {
}
}
+impl<'a> From<&'a [u8]> for ByteArray {
+ fn from(b: &'a [u8]) -> ByteArray {
+ let mut v = Vec::new();
+ v.extend_from_slice(b);
+ Self {
+ data: Some(ByteBufferPtr::new(v)),
+ }
+ }
+}
+
impl<'a> From<&'a str> for ByteArray {
fn from(s: &'a str) -> ByteArray {
let mut v = Vec::new();
diff --git a/parquet_derive/src/parquet_field.rs
b/parquet_derive/src/parquet_field.rs
index 48b6d3ac4..ea6878283 100644
--- a/parquet_derive/src/parquet_field.rs
+++ b/parquet_derive/src/parquet_field.rs
@@ -92,6 +92,10 @@ impl Field {
Type::TypePath(_) => self.option_into_vals(),
_ => unimplemented!("Unsupported type encountered"),
},
+ Type::Vec(ref first_type) => match **first_type {
+ Type::TypePath(_) => self.option_into_vals(),
+ _ => unimplemented!("Unsupported type encountered"),
+ },
ref f => unimplemented!("Unsupported: {:#?}", f),
},
Type::Reference(_, ref first_type) => match **first_type {
@@ -99,11 +103,27 @@ impl Field {
Type::Option(ref second_type) => match **second_type {
Type::TypePath(_) => self.option_into_vals(),
Type::Reference(_, ref second_type) => match **second_type
{
+ Type::TypePath(_) => self.option_into_vals(),
+ Type::Slice(ref second_type) => match **second_type {
+ Type::TypePath(_) => self.option_into_vals(),
+ ref f => unimplemented!("Unsupported: {:#?}", f),
+ },
+ _ => unimplemented!("Unsupported type encountered"),
+ },
+ Type::Vec(ref first_type) => match **first_type {
Type::TypePath(_) => self.option_into_vals(),
_ => unimplemented!("Unsupported type encountered"),
},
ref f => unimplemented!("Unsupported: {:#?}", f),
},
+ Type::Slice(ref second_type) => match **second_type {
+ Type::TypePath(_) => self.copied_direct_vals(),
+ ref f => unimplemented!("Unsupported: {:#?}", f),
+ },
+ ref f => unimplemented!("Unsupported: {:#?}", f),
+ },
+ Type::Vec(ref first_type) => match **first_type {
+ Type::TypePath(_) => self.copied_direct_vals(),
ref f => unimplemented!("Unsupported: {:#?}", f),
},
f => unimplemented!("Unsupported: {:#?}", f),
@@ -116,26 +136,55 @@ impl Field {
Type::Option(_) => unimplemented!("Unsupported nesting
encountered"),
Type::Reference(_, ref second_type)
| Type::Vec(ref second_type)
- | Type::Array(ref second_type) => match **second_type {
+ | Type::Array(ref second_type)
+ | Type::Slice(ref second_type) => match **second_type {
Type::TypePath(_) =>
Some(self.optional_definition_levels()),
_ => unimplemented!("Unsupported nesting encountered"),
},
},
Type::Reference(_, ref first_type)
| Type::Vec(ref first_type)
- | Type::Array(ref first_type) => match **first_type {
+ | Type::Array(ref first_type)
+ | Type::Slice(ref first_type) => match **first_type {
Type::TypePath(_) => None,
- Type::Reference(_, ref second_type)
- | Type::Vec(ref second_type)
+ Type::Vec(ref second_type)
| Type::Array(ref second_type)
- | Type::Option(ref second_type) => match **second_type {
- Type::TypePath(_) =>
Some(self.optional_definition_levels()),
+ | Type::Slice(ref second_type) => match **second_type {
+ Type::TypePath(_) => None,
Type::Reference(_, ref third_type) => match **third_type {
- Type::TypePath(_) =>
Some(self.optional_definition_levels()),
+ Type::TypePath(_) => None,
_ => unimplemented!("Unsupported definition
encountered"),
},
_ => unimplemented!("Unsupported definition encountered"),
},
+ Type::Reference(_, ref second_type) | Type::Option(ref
second_type) => {
+ match **second_type {
+ Type::TypePath(_) =>
Some(self.optional_definition_levels()),
+ Type::Vec(ref third_type)
+ | Type::Array(ref third_type)
+ | Type::Slice(ref third_type) => match **third_type {
+ Type::TypePath(_) =>
Some(self.optional_definition_levels()),
+ Type::Reference(_, ref fourth_type) => match
**fourth_type {
+ Type::TypePath(_) => {
+ Some(self.optional_definition_levels())
+ }
+ _ => unimplemented!("Unsupported definition
encountered"),
+ },
+ _ => unimplemented!("Unsupported definition
encountered"),
+ },
+ Type::Reference(_, ref third_type) => match
**third_type {
+ Type::TypePath(_) =>
Some(self.optional_definition_levels()),
+ Type::Slice(ref fourth_type) => match
**fourth_type {
+ Type::TypePath(_) => {
+ Some(self.optional_definition_levels())
+ }
+ _ => unimplemented!("Unsupported definition
encountered"),
+ },
+ _ => unimplemented!("Unsupported definition
encountered"),
+ },
+ _ => unimplemented!("Unsupported definition
encountered"),
+ }
+ }
},
};
@@ -323,6 +372,7 @@ impl Field {
enum Type {
Array(Box<Type>),
Option(Box<Type>),
+ Slice(Box<Type>),
Vec(Box<Type>),
TypePath(syn::Type),
Reference(Option<syn::Lifetime>, Box<Type>),
@@ -374,6 +424,7 @@ impl Type {
Type::Option(ref first_type)
| Type::Vec(ref first_type)
| Type::Array(ref first_type)
+ | Type::Slice(ref first_type)
| Type::Reference(_, ref first_type) => {
Type::leaf_type_recursive_helper(first_type, Some(ty))
}
@@ -391,6 +442,7 @@ impl Type {
Type::Option(ref first_type)
| Type::Vec(ref first_type)
| Type::Array(ref first_type)
+ | Type::Slice(ref first_type)
| Type::Reference(_, ref first_type) => match **first_type {
Type::TypePath(ref type_) => type_,
_ => unimplemented!("leaf_type() should only return shallow
types"),
@@ -443,7 +495,7 @@ impl Type {
}
}
}
- Type::Vec(ref first_type) => {
+ Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
if let Type::TypePath(_) = **first_type {
if last_part == "u8" {
return BasicType::BYTE_ARRAY;
@@ -484,7 +536,7 @@ impl Type {
}
}
}
- Type::Vec(ref first_type) => {
+ Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
if let Type::TypePath(_) = **first_type {
if last_part == "u8" {
return quote! { None };
@@ -572,6 +624,7 @@ impl Type {
syn::Type::Path(ref p) => Type::from_type_path(f, p),
syn::Type::Reference(ref tr) => Type::from_type_reference(f, tr),
syn::Type::Array(ref ta) => Type::from_type_array(f, ta),
+ syn::Type::Slice(ref ts) => Type::from_type_slice(f, ts),
other => unimplemented!(
"Unable to derive {:?} - it is currently an unsupported
type\n{:#?}",
f.ident.as_ref().unwrap(),
@@ -622,6 +675,11 @@ impl Type {
let inner_type = Type::from_type(f, ta.elem.as_ref());
Type::Array(Box::new(inner_type))
}
+
+ fn from_type_slice(f: &syn::Field, ts: &syn::TypeSlice) -> Self {
+ let inner_type = Type::from_type(f, ts.elem.as_ref());
+ Type::Slice(Box::new(inner_type))
+ }
}
#[cfg(test)]
diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs
index 746644793..2aa174974 100644
--- a/parquet_derive_test/src/lib.rs
+++ b/parquet_derive_test/src/lib.rs
@@ -42,6 +42,11 @@ struct ACompleteRecord<'a> {
pub borrowed_maybe_a_string: &'a Option<String>,
pub borrowed_maybe_a_str: &'a Option<&'a str>,
pub now: chrono::NaiveDateTime,
+ pub byte_vec: Vec<u8>,
+ pub maybe_byte_vec: Option<Vec<u8>>,
+ pub borrowed_byte_vec: &'a [u8],
+ pub borrowed_maybe_byte_vec: &'a Option<Vec<u8>>,
+ pub borrowed_maybe_borrowed_byte_vec: &'a Option<&'a [u8]>,
}
#[cfg(test)]
@@ -84,6 +89,11 @@ mod tests {
OPTIONAL BINARY borrowed_maybe_a_string (STRING);
OPTIONAL BINARY borrowed_maybe_a_str (STRING);
REQUIRED INT64 now (TIMESTAMP_MILLIS);
+ REQUIRED BINARY byte_vec;
+ OPTIONAL BINARY maybe_byte_vec;
+ REQUIRED BINARY borrowed_byte_vec;
+ OPTIONAL BINARY borrowed_maybe_byte_vec;
+ OPTIONAL BINARY borrowed_maybe_borrowed_byte_vec;
}";
let schema = Arc::new(parse_message_type(schema_str).unwrap());
@@ -92,6 +102,9 @@ mod tests {
let a_borrowed_string = "cool news".to_owned();
let maybe_a_string = Some("it's true, I'm a string".to_owned());
let maybe_a_str = Some(&a_str[..]);
+ let borrowed_byte_vec = vec![0x68, 0x69, 0x70];
+ let borrowed_maybe_byte_vec = Some(vec![0x71, 0x72]);
+ let borrowed_maybe_borrowed_byte_vec = Some(&borrowed_byte_vec[..]);
let drs: Vec<ACompleteRecord> = vec![ACompleteRecord {
a_bool: true,
@@ -115,6 +128,11 @@ mod tests {
borrowed_maybe_a_string: &maybe_a_string,
borrowed_maybe_a_str: &maybe_a_str,
now: chrono::Utc::now().naive_local(),
+ byte_vec: vec![0x65, 0x66, 0x67],
+ maybe_byte_vec: Some(vec![0x88, 0x89, 0x90]),
+ borrowed_byte_vec: &borrowed_byte_vec,
+ borrowed_maybe_byte_vec: &borrowed_maybe_byte_vec,
+ borrowed_maybe_borrowed_byte_vec:
&borrowed_maybe_borrowed_byte_vec,
}];
let generated_schema = drs.as_slice().schema().unwrap();