This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new a85fc030bb Support setting key field in MapBuilder (#7101)
a85fc030bb is described below
commit a85fc030bb6d3c4b7230073f7dd693aa5d7a2d6e
Author: Willi Raschkowski <[email protected]>
AuthorDate: Wed Feb 12 10:27:01 2025 +0000
Support setting key field in MapBuilder (#7101)
* Support setting key field metadata in MapBuilder
* Return ArrowError for nullable keys
* Panic instead of returning Result
* Update doc
* Move non-nullable keys field validation into finish
* Oops, update doc after moving panic to finish
* Assert that keys_field type mismatch results in panic
* Update arrow-array/src/builder/map_builder.rs
---------
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---
arrow-array/src/builder/map_builder.rs | 100 +++++++++++++++++++++++++++---
arrow-array/src/builder/struct_builder.rs | 3 +-
2 files changed, 94 insertions(+), 9 deletions(-)
diff --git a/arrow-array/src/builder/map_builder.rs
b/arrow-array/src/builder/map_builder.rs
index 1d89d427aa..012a454e76 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -61,6 +61,7 @@ pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
field_names: MapFieldNames,
key_builder: K,
value_builder: V,
+ key_field: Option<FieldRef>,
value_field: Option<FieldRef>,
}
@@ -107,13 +108,27 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
field_names: field_names.unwrap_or_default(),
key_builder,
value_builder,
+ key_field: None,
value_field: None,
}
}
/// Override the field passed to [`MapBuilder::new`]
///
- /// By default a nullable field is created with the name `values`
+ /// By default, a non-nullable field is created with the name `keys`
+ ///
+ /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
+ /// field's data type does not match that of `K` or the field is nullable
+ pub fn with_keys_field(self, field: impl Into<FieldRef>) -> Self {
+ Self {
+ key_field: Some(field.into()),
+ ..self
+ }
+ }
+
+ /// Override the field passed to [`MapBuilder::new`]
+ ///
+ /// By default, a nullable field is created with the name `values`
///
/// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
/// field's data type does not match that of `V`
@@ -194,11 +209,17 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
keys_arr.null_count()
);
- let keys_field = Arc::new(Field::new(
- self.field_names.key.as_str(),
- keys_arr.data_type().clone(),
- false, // always non-nullable
- ));
+ let keys_field = match &self.key_field {
+ Some(f) => {
+ assert!(!f.is_nullable(), "Keys field must not be nullable");
+ f.clone()
+ }
+ None => Arc::new(Field::new(
+ self.field_names.key.as_str(),
+ keys_arr.data_type().clone(),
+ false, // always non-nullable
+ )),
+ };
let values_field = match &self.value_field {
Some(f) => f.clone(),
None => Arc::new(Field::new(
@@ -262,10 +283,10 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for
MapBuilder<K, V> {
#[cfg(test)]
mod tests {
+ use super::*;
use crate::builder::{make_builder, Int32Builder, StringBuilder};
use crate::{Int32Array, StringArray};
-
- use super::*;
+ use std::collections::HashMap;
#[test]
#[should_panic(expected = "Keys array must have no null values, found 1
null value(s)")]
@@ -377,4 +398,67 @@ mod tests {
)
);
}
+
+ #[test]
+ fn test_with_keys_field() {
+ let mut key_metadata = HashMap::new();
+ key_metadata.insert("foo".to_string(), "bar".to_string());
+ let key_field = Arc::new(
+ Field::new("keys", DataType::Int32,
false).with_metadata(key_metadata.clone()),
+ );
+ let mut builder = MapBuilder::new(None, Int32Builder::new(),
Int32Builder::new())
+ .with_keys_field(key_field.clone());
+ builder.keys().append_value(1);
+ builder.values().append_value(2);
+ builder.append(true).unwrap();
+ let map = builder.finish();
+
+ assert_eq!(map.len(), 1);
+ assert_eq!(
+ map.data_type(),
+ &DataType::Map(
+ Arc::new(Field::new(
+ "entries",
+ DataType::Struct(
+ vec![
+ Arc::new(
+ Field::new("keys", DataType::Int32, false)
+ .with_metadata(key_metadata)
+ ),
+ Arc::new(Field::new("values", DataType::Int32,
true))
+ ]
+ .into()
+ ),
+ false,
+ )),
+ false
+ )
+ );
+ }
+
+ #[test]
+ #[should_panic(expected = "Keys field must not be nullable")]
+ fn test_with_nullable_keys_field() {
+ let mut builder = MapBuilder::new(None, Int32Builder::new(),
Int32Builder::new())
+ .with_keys_field(Arc::new(Field::new("keys", DataType::Int32,
true)));
+
+ builder.keys().append_value(1);
+ builder.values().append_value(2);
+ builder.append(true).unwrap();
+
+ builder.finish();
+ }
+
+ #[test]
+ #[should_panic(expected = "Incorrect datatype")]
+ fn test_keys_field_type_mismatch() {
+ let mut builder = MapBuilder::new(None, Int32Builder::new(),
Int32Builder::new())
+ .with_keys_field(Arc::new(Field::new("keys", DataType::Utf8,
false)));
+
+ builder.keys().append_value(1);
+ builder.values().append_value(2);
+ builder.append(true).unwrap();
+
+ builder.finish();
+ }
}
diff --git a/arrow-array/src/builder/struct_builder.rs
b/arrow-array/src/builder/struct_builder.rs
index 4a40c22017..5cebc6485e 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -296,10 +296,11 @@ pub fn make_builder(datatype: &DataType, capacity: usize)
-> Box<dyn ArrayBuilde
value_builder,
capacity,
)
+ .with_keys_field(fields[0].clone())
.with_values_field(fields[1].clone()),
)
}
- t => panic!("The field of Map data type {t:?} should has a child
Struct field"),
+ t => panic!("The field of Map data type {t:?} should have a child
Struct field"),
},
DataType::Struct(fields) =>
Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
t @ DataType::Dictionary(key_type, value_type) => {